{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.949649483319732, "eval_steps": 20000, "global_step": 300000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 23.42283821105957, "learning_rate": 1.1000000000000001e-08, "loss": 0.9255, "step": 25 }, { "epoch": 0.0, "grad_norm": 54.43226623535156, "learning_rate": 2.3e-08, "loss": 2.2005, "step": 50 }, { "epoch": 0.0, "grad_norm": 35.09960174560547, "learning_rate": 3.550000000000001e-08, "loss": 0.8899, "step": 75 }, { "epoch": 0.0, "grad_norm": 53.03972625732422, "learning_rate": 4.8e-08, "loss": 2.1095, "step": 100 }, { "epoch": 0.0, "grad_norm": 24.472898483276367, "learning_rate": 6.05e-08, "loss": 0.9293, "step": 125 }, { "epoch": 0.0, "grad_norm": 49.78120422363281, "learning_rate": 7.3e-08, "loss": 2.2634, "step": 150 }, { "epoch": 0.0, "grad_norm": 17.77945327758789, "learning_rate": 8.55e-08, "loss": 0.8527, "step": 175 }, { "epoch": 0.0, "grad_norm": 47.113441467285156, "learning_rate": 9.8e-08, "loss": 1.9916, "step": 200 }, { "epoch": 0.0, "grad_norm": 24.13808822631836, "learning_rate": 1.1050000000000002e-07, "loss": 0.8935, "step": 225 }, { "epoch": 0.0, "grad_norm": 47.03296661376953, "learning_rate": 1.23e-07, "loss": 1.9753, "step": 250 }, { "epoch": 0.0, "grad_norm": 20.135156631469727, "learning_rate": 1.3550000000000002e-07, "loss": 0.8993, "step": 275 }, { "epoch": 0.0, "grad_norm": 43.35232925415039, "learning_rate": 1.4800000000000003e-07, "loss": 1.829, "step": 300 }, { "epoch": 0.0, "grad_norm": 15.744970321655273, "learning_rate": 1.605e-07, "loss": 0.7629, "step": 325 }, { "epoch": 0.0, "grad_norm": 36.265804290771484, "learning_rate": 1.73e-07, "loss": 1.6641, "step": 350 }, { "epoch": 0.0, "grad_norm": 14.246800422668457, "learning_rate": 1.8550000000000001e-07, "loss": 0.6895, "step": 375 }, { "epoch": 0.0, "grad_norm": 43.7151985168457, "learning_rate": 1.9800000000000003e-07, "loss": 1.3097, "step": 400 }, { "epoch": 0.0, "grad_norm": 26.58841323852539, "learning_rate": 2.105e-07, "loss": 0.5846, "step": 425 }, { "epoch": 0.0, "grad_norm": 43.2161865234375, "learning_rate": 2.2300000000000002e-07, "loss": 1.2725, "step": 450 }, { "epoch": 0.0, "grad_norm": 23.538555145263672, "learning_rate": 2.3550000000000004e-07, "loss": 0.5106, "step": 475 }, { "epoch": 0.0, "grad_norm": 43.29937744140625, "learning_rate": 2.48e-07, "loss": 1.0972, "step": 500 }, { "epoch": 0.01, "grad_norm": 17.562902450561523, "learning_rate": 2.6050000000000004e-07, "loss": 0.4685, "step": 525 }, { "epoch": 0.01, "grad_norm": 38.34809112548828, "learning_rate": 2.73e-07, "loss": 0.9177, "step": 550 }, { "epoch": 0.01, "grad_norm": 18.101844787597656, "learning_rate": 2.855e-07, "loss": 0.4382, "step": 575 }, { "epoch": 0.01, "grad_norm": 38.8128547668457, "learning_rate": 2.9800000000000005e-07, "loss": 0.9793, "step": 600 }, { "epoch": 0.01, "grad_norm": 21.48680305480957, "learning_rate": 3.1050000000000003e-07, "loss": 0.484, "step": 625 }, { "epoch": 0.01, "grad_norm": 36.1438102722168, "learning_rate": 3.2300000000000007e-07, "loss": 1.0183, "step": 650 }, { "epoch": 0.01, "grad_norm": 23.882946014404297, "learning_rate": 3.3550000000000006e-07, "loss": 0.4233, "step": 675 }, { "epoch": 0.01, "grad_norm": 32.39024353027344, "learning_rate": 3.48e-07, "loss": 0.7997, "step": 700 }, { "epoch": 0.01, "grad_norm": 8.52871322631836, "learning_rate": 3.6050000000000003e-07, "loss": 0.3482, "step": 725 }, { "epoch": 0.01, "grad_norm": 33.998775482177734, "learning_rate": 3.73e-07, "loss": 0.9174, "step": 750 }, { "epoch": 0.01, "grad_norm": 44.87738037109375, "learning_rate": 3.8550000000000006e-07, "loss": 0.3774, "step": 775 }, { "epoch": 0.01, "grad_norm": 31.623411178588867, "learning_rate": 3.9800000000000004e-07, "loss": 0.7539, "step": 800 }, { "epoch": 0.01, "grad_norm": 23.027366638183594, "learning_rate": 4.105000000000001e-07, "loss": 0.4131, "step": 825 }, { "epoch": 0.01, "grad_norm": 33.8479118347168, "learning_rate": 4.23e-07, "loss": 0.7987, "step": 850 }, { "epoch": 0.01, "grad_norm": 16.185195922851562, "learning_rate": 4.355e-07, "loss": 0.3946, "step": 875 }, { "epoch": 0.01, "grad_norm": 34.58127975463867, "learning_rate": 4.4800000000000004e-07, "loss": 0.838, "step": 900 }, { "epoch": 0.01, "grad_norm": 18.806821823120117, "learning_rate": 4.6050000000000003e-07, "loss": 0.3965, "step": 925 }, { "epoch": 0.01, "grad_norm": 25.564226150512695, "learning_rate": 4.7300000000000007e-07, "loss": 0.6743, "step": 950 }, { "epoch": 0.01, "grad_norm": 15.104978561401367, "learning_rate": 4.855e-07, "loss": 0.3803, "step": 975 }, { "epoch": 0.01, "grad_norm": 31.048397064208984, "learning_rate": 4.98e-07, "loss": 0.7105, "step": 1000 }, { "epoch": 0.01, "grad_norm": 16.98179817199707, "learning_rate": 5.105e-07, "loss": 0.391, "step": 1025 }, { "epoch": 0.01, "grad_norm": 25.55710220336914, "learning_rate": 5.23e-07, "loss": 0.7884, "step": 1050 }, { "epoch": 0.01, "grad_norm": 20.981952667236328, "learning_rate": 5.355e-07, "loss": 0.4967, "step": 1075 }, { "epoch": 0.01, "grad_norm": 30.654111862182617, "learning_rate": 5.480000000000001e-07, "loss": 0.753, "step": 1100 }, { "epoch": 0.01, "grad_norm": 16.536741256713867, "learning_rate": 5.605000000000001e-07, "loss": 0.3492, "step": 1125 }, { "epoch": 0.01, "grad_norm": 28.932754516601562, "learning_rate": 5.730000000000001e-07, "loss": 0.6753, "step": 1150 }, { "epoch": 0.01, "grad_norm": 21.23940086364746, "learning_rate": 5.855e-07, "loss": 0.4202, "step": 1175 }, { "epoch": 0.01, "grad_norm": 27.76628303527832, "learning_rate": 5.98e-07, "loss": 0.7374, "step": 1200 }, { "epoch": 0.01, "grad_norm": 21.627805709838867, "learning_rate": 6.105e-07, "loss": 0.3567, "step": 1225 }, { "epoch": 0.01, "grad_norm": 24.64447593688965, "learning_rate": 6.230000000000001e-07, "loss": 0.6981, "step": 1250 }, { "epoch": 0.01, "grad_norm": 14.282390594482422, "learning_rate": 6.355e-07, "loss": 0.3858, "step": 1275 }, { "epoch": 0.01, "grad_norm": 33.29556655883789, "learning_rate": 6.48e-07, "loss": 0.7272, "step": 1300 }, { "epoch": 0.01, "grad_norm": 14.240214347839355, "learning_rate": 6.605000000000001e-07, "loss": 0.3993, "step": 1325 }, { "epoch": 0.01, "grad_norm": 27.55805015563965, "learning_rate": 6.730000000000001e-07, "loss": 0.7736, "step": 1350 }, { "epoch": 0.01, "grad_norm": 10.175492286682129, "learning_rate": 6.855e-07, "loss": 0.3407, "step": 1375 }, { "epoch": 0.01, "grad_norm": 24.985837936401367, "learning_rate": 6.98e-07, "loss": 0.7635, "step": 1400 }, { "epoch": 0.01, "grad_norm": 10.972410202026367, "learning_rate": 7.105000000000001e-07, "loss": 0.3285, "step": 1425 }, { "epoch": 0.01, "grad_norm": 21.68819236755371, "learning_rate": 7.230000000000001e-07, "loss": 0.7334, "step": 1450 }, { "epoch": 0.01, "grad_norm": 15.809149742126465, "learning_rate": 7.355000000000001e-07, "loss": 0.3628, "step": 1475 }, { "epoch": 0.01, "grad_norm": 22.47153663635254, "learning_rate": 7.480000000000001e-07, "loss": 0.6661, "step": 1500 }, { "epoch": 0.01, "grad_norm": 16.649110794067383, "learning_rate": 7.605000000000002e-07, "loss": 0.3023, "step": 1525 }, { "epoch": 0.02, "grad_norm": 23.583951950073242, "learning_rate": 7.73e-07, "loss": 0.6486, "step": 1550 }, { "epoch": 0.02, "grad_norm": 16.382156372070312, "learning_rate": 7.855e-07, "loss": 0.3842, "step": 1575 }, { "epoch": 0.02, "grad_norm": 26.790008544921875, "learning_rate": 7.98e-07, "loss": 0.6857, "step": 1600 }, { "epoch": 0.02, "grad_norm": 22.074182510375977, "learning_rate": 8.105e-07, "loss": 0.3223, "step": 1625 }, { "epoch": 0.02, "grad_norm": 19.876205444335938, "learning_rate": 8.23e-07, "loss": 0.715, "step": 1650 }, { "epoch": 0.02, "grad_norm": 10.254929542541504, "learning_rate": 8.355000000000001e-07, "loss": 0.3587, "step": 1675 }, { "epoch": 0.02, "grad_norm": 28.97859764099121, "learning_rate": 8.480000000000001e-07, "loss": 0.7055, "step": 1700 }, { "epoch": 0.02, "grad_norm": 14.412611961364746, "learning_rate": 8.605000000000001e-07, "loss": 0.3328, "step": 1725 }, { "epoch": 0.02, "grad_norm": 22.884933471679688, "learning_rate": 8.73e-07, "loss": 0.6504, "step": 1750 }, { "epoch": 0.02, "grad_norm": 18.31490707397461, "learning_rate": 8.855000000000001e-07, "loss": 0.3612, "step": 1775 }, { "epoch": 0.02, "grad_norm": 22.611190795898438, "learning_rate": 8.980000000000001e-07, "loss": 0.5558, "step": 1800 }, { "epoch": 0.02, "grad_norm": 12.549702644348145, "learning_rate": 9.105000000000001e-07, "loss": 0.3424, "step": 1825 }, { "epoch": 0.02, "grad_norm": 21.657196044921875, "learning_rate": 9.23e-07, "loss": 0.6675, "step": 1850 }, { "epoch": 0.02, "grad_norm": 16.13555335998535, "learning_rate": 9.355e-07, "loss": 0.3214, "step": 1875 }, { "epoch": 0.02, "grad_norm": 25.21604347229004, "learning_rate": 9.480000000000001e-07, "loss": 0.6414, "step": 1900 }, { "epoch": 0.02, "grad_norm": 10.80751895904541, "learning_rate": 9.605e-07, "loss": 0.3123, "step": 1925 }, { "epoch": 0.02, "grad_norm": 33.81224060058594, "learning_rate": 9.73e-07, "loss": 0.6907, "step": 1950 }, { "epoch": 0.02, "grad_norm": 14.145767211914062, "learning_rate": 9.855000000000001e-07, "loss": 0.2827, "step": 1975 }, { "epoch": 0.02, "grad_norm": 26.546306610107422, "learning_rate": 9.98e-07, "loss": 0.6835, "step": 2000 }, { "epoch": 0.02, "grad_norm": 13.040128707885742, "learning_rate": 1.0105000000000001e-06, "loss": 0.3525, "step": 2025 }, { "epoch": 0.02, "grad_norm": 28.275651931762695, "learning_rate": 1.0230000000000002e-06, "loss": 0.6746, "step": 2050 }, { "epoch": 0.02, "grad_norm": 11.18452262878418, "learning_rate": 1.0355e-06, "loss": 0.3092, "step": 2075 }, { "epoch": 0.02, "grad_norm": 23.599761962890625, "learning_rate": 1.0480000000000002e-06, "loss": 0.6259, "step": 2100 }, { "epoch": 0.02, "grad_norm": 10.203682899475098, "learning_rate": 1.0605e-06, "loss": 0.34, "step": 2125 }, { "epoch": 0.02, "grad_norm": 22.647369384765625, "learning_rate": 1.0725000000000001e-06, "loss": 0.6313, "step": 2150 }, { "epoch": 0.02, "grad_norm": 17.094491958618164, "learning_rate": 1.085e-06, "loss": 0.3444, "step": 2175 }, { "epoch": 0.02, "grad_norm": 26.861534118652344, "learning_rate": 1.0975e-06, "loss": 0.5332, "step": 2200 }, { "epoch": 0.02, "grad_norm": 14.720125198364258, "learning_rate": 1.1100000000000002e-06, "loss": 0.3058, "step": 2225 }, { "epoch": 0.02, "grad_norm": 26.136003494262695, "learning_rate": 1.1225e-06, "loss": 0.6226, "step": 2250 }, { "epoch": 0.02, "grad_norm": 8.917914390563965, "learning_rate": 1.1350000000000001e-06, "loss": 0.2846, "step": 2275 }, { "epoch": 0.02, "grad_norm": 14.714705467224121, "learning_rate": 1.1475000000000002e-06, "loss": 0.5922, "step": 2300 }, { "epoch": 0.02, "grad_norm": 14.2655668258667, "learning_rate": 1.1600000000000001e-06, "loss": 0.3457, "step": 2325 }, { "epoch": 0.02, "grad_norm": 18.780527114868164, "learning_rate": 1.1725e-06, "loss": 0.616, "step": 2350 }, { "epoch": 0.02, "grad_norm": 16.51603889465332, "learning_rate": 1.185e-06, "loss": 0.3679, "step": 2375 }, { "epoch": 0.02, "grad_norm": 20.098976135253906, "learning_rate": 1.1975e-06, "loss": 0.5574, "step": 2400 }, { "epoch": 0.02, "grad_norm": 16.542795181274414, "learning_rate": 1.21e-06, "loss": 0.3355, "step": 2425 }, { "epoch": 0.02, "grad_norm": 25.48407554626465, "learning_rate": 1.2225000000000002e-06, "loss": 0.6351, "step": 2450 }, { "epoch": 0.02, "grad_norm": 14.543630599975586, "learning_rate": 1.235e-06, "loss": 0.307, "step": 2475 }, { "epoch": 0.02, "grad_norm": 21.76466941833496, "learning_rate": 1.2475000000000001e-06, "loss": 0.5518, "step": 2500 }, { "epoch": 0.02, "grad_norm": 7.915212154388428, "learning_rate": 1.26e-06, "loss": 0.2833, "step": 2525 }, { "epoch": 0.03, "grad_norm": 19.45631980895996, "learning_rate": 1.2725e-06, "loss": 0.6022, "step": 2550 }, { "epoch": 0.03, "grad_norm": 17.4409236907959, "learning_rate": 1.2850000000000002e-06, "loss": 0.3027, "step": 2575 }, { "epoch": 0.03, "grad_norm": 24.957618713378906, "learning_rate": 1.2975e-06, "loss": 0.5826, "step": 2600 }, { "epoch": 0.03, "grad_norm": 15.176064491271973, "learning_rate": 1.3100000000000002e-06, "loss": 0.313, "step": 2625 }, { "epoch": 0.03, "grad_norm": 23.41546058654785, "learning_rate": 1.3225000000000003e-06, "loss": 0.591, "step": 2650 }, { "epoch": 0.03, "grad_norm": 9.091719627380371, "learning_rate": 1.3350000000000001e-06, "loss": 0.3427, "step": 2675 }, { "epoch": 0.03, "grad_norm": 20.14720344543457, "learning_rate": 1.3475000000000002e-06, "loss": 0.569, "step": 2700 }, { "epoch": 0.03, "grad_norm": 18.62778663635254, "learning_rate": 1.3600000000000001e-06, "loss": 0.3465, "step": 2725 }, { "epoch": 0.03, "grad_norm": 17.653825759887695, "learning_rate": 1.3725000000000002e-06, "loss": 0.5879, "step": 2750 }, { "epoch": 0.03, "grad_norm": 14.82190227508545, "learning_rate": 1.3850000000000003e-06, "loss": 0.3052, "step": 2775 }, { "epoch": 0.03, "grad_norm": 24.45807456970215, "learning_rate": 1.3975000000000002e-06, "loss": 0.5812, "step": 2800 }, { "epoch": 0.03, "grad_norm": 14.287732124328613, "learning_rate": 1.41e-06, "loss": 0.3097, "step": 2825 }, { "epoch": 0.03, "grad_norm": 26.574039459228516, "learning_rate": 1.4225e-06, "loss": 0.5912, "step": 2850 }, { "epoch": 0.03, "grad_norm": 17.252702713012695, "learning_rate": 1.435e-06, "loss": 0.2763, "step": 2875 }, { "epoch": 0.03, "grad_norm": 22.374771118164062, "learning_rate": 1.4475000000000001e-06, "loss": 0.6004, "step": 2900 }, { "epoch": 0.03, "grad_norm": 11.585715293884277, "learning_rate": 1.46e-06, "loss": 0.3582, "step": 2925 }, { "epoch": 0.03, "grad_norm": 27.637187957763672, "learning_rate": 1.4725e-06, "loss": 0.6057, "step": 2950 }, { "epoch": 0.03, "grad_norm": 14.491903305053711, "learning_rate": 1.485e-06, "loss": 0.3343, "step": 2975 }, { "epoch": 0.03, "grad_norm": 23.771865844726562, "learning_rate": 1.4975e-06, "loss": 0.6582, "step": 3000 }, { "epoch": 0.03, "grad_norm": 14.844294548034668, "learning_rate": 1.5100000000000002e-06, "loss": 0.2911, "step": 3025 }, { "epoch": 0.03, "grad_norm": 31.02235221862793, "learning_rate": 1.5225e-06, "loss": 0.6252, "step": 3050 }, { "epoch": 0.03, "grad_norm": 11.044919967651367, "learning_rate": 1.5350000000000001e-06, "loss": 0.2953, "step": 3075 }, { "epoch": 0.03, "grad_norm": 22.002580642700195, "learning_rate": 1.5475000000000002e-06, "loss": 0.5104, "step": 3100 }, { "epoch": 0.03, "grad_norm": 15.219182968139648, "learning_rate": 1.56e-06, "loss": 0.3013, "step": 3125 }, { "epoch": 0.03, "grad_norm": 27.542530059814453, "learning_rate": 1.5725000000000002e-06, "loss": 0.5465, "step": 3150 }, { "epoch": 0.03, "grad_norm": 7.492985248565674, "learning_rate": 1.585e-06, "loss": 0.2424, "step": 3175 }, { "epoch": 0.03, "grad_norm": 24.621978759765625, "learning_rate": 1.5975000000000002e-06, "loss": 0.55, "step": 3200 }, { "epoch": 0.03, "grad_norm": 20.63075828552246, "learning_rate": 1.6100000000000003e-06, "loss": 0.3201, "step": 3225 }, { "epoch": 0.03, "grad_norm": 24.4765567779541, "learning_rate": 1.6225000000000001e-06, "loss": 0.6011, "step": 3250 }, { "epoch": 0.03, "grad_norm": 10.803881645202637, "learning_rate": 1.6350000000000002e-06, "loss": 0.3221, "step": 3275 }, { "epoch": 0.03, "grad_norm": 26.37066650390625, "learning_rate": 1.6475000000000001e-06, "loss": 0.5889, "step": 3300 }, { "epoch": 0.03, "grad_norm": 12.46799373626709, "learning_rate": 1.6600000000000002e-06, "loss": 0.2885, "step": 3325 }, { "epoch": 0.03, "grad_norm": 25.41158676147461, "learning_rate": 1.6725000000000003e-06, "loss": 0.5067, "step": 3350 }, { "epoch": 0.03, "grad_norm": 15.126263618469238, "learning_rate": 1.6850000000000002e-06, "loss": 0.2835, "step": 3375 }, { "epoch": 0.03, "grad_norm": 23.599336624145508, "learning_rate": 1.6975000000000003e-06, "loss": 0.5504, "step": 3400 }, { "epoch": 0.03, "grad_norm": 13.102058410644531, "learning_rate": 1.7100000000000004e-06, "loss": 0.3093, "step": 3425 }, { "epoch": 0.03, "grad_norm": 18.322994232177734, "learning_rate": 1.7225e-06, "loss": 0.4564, "step": 3450 }, { "epoch": 0.03, "grad_norm": 14.70544719696045, "learning_rate": 1.7350000000000001e-06, "loss": 0.2964, "step": 3475 }, { "epoch": 0.03, "grad_norm": 27.497079849243164, "learning_rate": 1.7475e-06, "loss": 0.5997, "step": 3500 }, { "epoch": 0.03, "grad_norm": 12.483077049255371, "learning_rate": 1.76e-06, "loss": 0.2973, "step": 3525 }, { "epoch": 0.03, "grad_norm": 31.631587982177734, "learning_rate": 1.7725e-06, "loss": 0.6141, "step": 3550 }, { "epoch": 0.04, "grad_norm": 8.688480377197266, "learning_rate": 1.785e-06, "loss": 0.2738, "step": 3575 }, { "epoch": 0.04, "grad_norm": 28.020715713500977, "learning_rate": 1.7975000000000002e-06, "loss": 0.5169, "step": 3600 }, { "epoch": 0.04, "grad_norm": 10.676518440246582, "learning_rate": 1.81e-06, "loss": 0.2703, "step": 3625 }, { "epoch": 0.04, "grad_norm": 27.905027389526367, "learning_rate": 1.8225000000000001e-06, "loss": 0.6198, "step": 3650 }, { "epoch": 0.04, "grad_norm": 10.84447193145752, "learning_rate": 1.8350000000000002e-06, "loss": 0.2583, "step": 3675 }, { "epoch": 0.04, "grad_norm": 24.488121032714844, "learning_rate": 1.8475e-06, "loss": 0.613, "step": 3700 }, { "epoch": 0.04, "grad_norm": 14.173299789428711, "learning_rate": 1.8600000000000002e-06, "loss": 0.301, "step": 3725 }, { "epoch": 0.04, "grad_norm": 23.557649612426758, "learning_rate": 1.8725e-06, "loss": 0.6042, "step": 3750 }, { "epoch": 0.04, "grad_norm": 12.60310173034668, "learning_rate": 1.8850000000000002e-06, "loss": 0.3116, "step": 3775 }, { "epoch": 0.04, "grad_norm": 31.24827766418457, "learning_rate": 1.8975000000000003e-06, "loss": 0.6194, "step": 3800 }, { "epoch": 0.04, "grad_norm": 18.685054779052734, "learning_rate": 1.9100000000000003e-06, "loss": 0.2738, "step": 3825 }, { "epoch": 0.04, "grad_norm": 17.56845474243164, "learning_rate": 1.9225000000000002e-06, "loss": 0.6504, "step": 3850 }, { "epoch": 0.04, "grad_norm": 15.701684951782227, "learning_rate": 1.935e-06, "loss": 0.2764, "step": 3875 }, { "epoch": 0.04, "grad_norm": 27.479557037353516, "learning_rate": 1.9475000000000004e-06, "loss": 0.5998, "step": 3900 }, { "epoch": 0.04, "grad_norm": 17.550142288208008, "learning_rate": 1.9600000000000003e-06, "loss": 0.3297, "step": 3925 }, { "epoch": 0.04, "grad_norm": 31.376115798950195, "learning_rate": 1.9725e-06, "loss": 0.5666, "step": 3950 }, { "epoch": 0.04, "grad_norm": 13.57155990600586, "learning_rate": 1.985e-06, "loss": 0.3572, "step": 3975 }, { "epoch": 0.04, "grad_norm": 20.4503173828125, "learning_rate": 1.9975000000000004e-06, "loss": 0.5922, "step": 4000 }, { "epoch": 0.04, "grad_norm": 14.024149894714355, "learning_rate": 2.0100000000000002e-06, "loss": 0.3656, "step": 4025 }, { "epoch": 0.04, "grad_norm": 19.237850189208984, "learning_rate": 2.0225e-06, "loss": 0.5655, "step": 4050 }, { "epoch": 0.04, "grad_norm": 11.273808479309082, "learning_rate": 2.035e-06, "loss": 0.2948, "step": 4075 }, { "epoch": 0.04, "grad_norm": 24.21982765197754, "learning_rate": 2.0475e-06, "loss": 0.5984, "step": 4100 }, { "epoch": 0.04, "grad_norm": 17.36025047302246, "learning_rate": 2.06e-06, "loss": 0.2547, "step": 4125 }, { "epoch": 0.04, "grad_norm": 25.583646774291992, "learning_rate": 2.0725e-06, "loss": 0.5384, "step": 4150 }, { "epoch": 0.04, "grad_norm": 12.437751770019531, "learning_rate": 2.085e-06, "loss": 0.2274, "step": 4175 }, { "epoch": 0.04, "grad_norm": 52.4139518737793, "learning_rate": 2.0975000000000002e-06, "loss": 0.5682, "step": 4200 }, { "epoch": 0.04, "grad_norm": 11.61032485961914, "learning_rate": 2.11e-06, "loss": 0.3209, "step": 4225 }, { "epoch": 0.04, "grad_norm": 25.858030319213867, "learning_rate": 2.1225e-06, "loss": 0.5277, "step": 4250 }, { "epoch": 0.04, "grad_norm": 10.285331726074219, "learning_rate": 2.1350000000000003e-06, "loss": 0.3148, "step": 4275 }, { "epoch": 0.04, "grad_norm": 28.985692977905273, "learning_rate": 2.1475e-06, "loss": 0.539, "step": 4300 }, { "epoch": 0.04, "grad_norm": 15.672455787658691, "learning_rate": 2.16e-06, "loss": 0.2804, "step": 4325 }, { "epoch": 0.04, "grad_norm": 28.33833122253418, "learning_rate": 2.1725000000000004e-06, "loss": 0.5156, "step": 4350 }, { "epoch": 0.04, "grad_norm": 7.049198627471924, "learning_rate": 2.1850000000000003e-06, "loss": 0.2227, "step": 4375 }, { "epoch": 0.04, "grad_norm": 26.713266372680664, "learning_rate": 2.1970000000000003e-06, "loss": 0.5751, "step": 4400 }, { "epoch": 0.04, "grad_norm": 19.074817657470703, "learning_rate": 2.2095e-06, "loss": 0.282, "step": 4425 }, { "epoch": 0.04, "grad_norm": 29.829111099243164, "learning_rate": 2.222e-06, "loss": 0.5214, "step": 4450 }, { "epoch": 0.04, "grad_norm": 11.166728019714355, "learning_rate": 2.2345000000000004e-06, "loss": 0.2416, "step": 4475 }, { "epoch": 0.04, "grad_norm": 23.915325164794922, "learning_rate": 2.2470000000000003e-06, "loss": 0.4988, "step": 4500 }, { "epoch": 0.04, "grad_norm": 12.559374809265137, "learning_rate": 2.2595e-06, "loss": 0.277, "step": 4525 }, { "epoch": 0.04, "grad_norm": 18.104415893554688, "learning_rate": 2.2720000000000004e-06, "loss": 0.5388, "step": 4550 }, { "epoch": 0.04, "grad_norm": 3.8616342544555664, "learning_rate": 2.2845e-06, "loss": 0.2794, "step": 4575 }, { "epoch": 0.05, "grad_norm": 21.544191360473633, "learning_rate": 2.297e-06, "loss": 0.537, "step": 4600 }, { "epoch": 0.05, "grad_norm": 13.912849426269531, "learning_rate": 2.3095e-06, "loss": 0.2573, "step": 4625 }, { "epoch": 0.05, "grad_norm": 21.982852935791016, "learning_rate": 2.322e-06, "loss": 0.4929, "step": 4650 }, { "epoch": 0.05, "grad_norm": 16.866371154785156, "learning_rate": 2.3345000000000003e-06, "loss": 0.2966, "step": 4675 }, { "epoch": 0.05, "grad_norm": 28.57170867919922, "learning_rate": 2.347e-06, "loss": 0.5768, "step": 4700 }, { "epoch": 0.05, "grad_norm": 8.362975120544434, "learning_rate": 2.3595e-06, "loss": 0.3112, "step": 4725 }, { "epoch": 0.05, "grad_norm": 13.742633819580078, "learning_rate": 2.3720000000000003e-06, "loss": 0.5608, "step": 4750 }, { "epoch": 0.05, "grad_norm": 16.084518432617188, "learning_rate": 2.3845e-06, "loss": 0.2565, "step": 4775 }, { "epoch": 0.05, "grad_norm": 19.2742919921875, "learning_rate": 2.397e-06, "loss": 0.491, "step": 4800 }, { "epoch": 0.05, "grad_norm": 23.673070907592773, "learning_rate": 2.4095e-06, "loss": 0.3238, "step": 4825 }, { "epoch": 0.05, "grad_norm": 15.477209091186523, "learning_rate": 2.4220000000000003e-06, "loss": 0.5351, "step": 4850 }, { "epoch": 0.05, "grad_norm": 19.833494186401367, "learning_rate": 2.4345e-06, "loss": 0.3134, "step": 4875 }, { "epoch": 0.05, "grad_norm": 20.03614044189453, "learning_rate": 2.447e-06, "loss": 0.5886, "step": 4900 }, { "epoch": 0.05, "grad_norm": 16.656845092773438, "learning_rate": 2.4595000000000003e-06, "loss": 0.2986, "step": 4925 }, { "epoch": 0.05, "grad_norm": 24.008588790893555, "learning_rate": 2.4720000000000002e-06, "loss": 0.5494, "step": 4950 }, { "epoch": 0.05, "grad_norm": 16.217710494995117, "learning_rate": 2.4845e-06, "loss": 0.2816, "step": 4975 }, { "epoch": 0.05, "grad_norm": 17.38448143005371, "learning_rate": 2.4970000000000004e-06, "loss": 0.5748, "step": 5000 }, { "epoch": 0.05, "grad_norm": 13.786866188049316, "learning_rate": 2.5095000000000003e-06, "loss": 0.2606, "step": 5025 }, { "epoch": 0.05, "grad_norm": 32.112998962402344, "learning_rate": 2.522e-06, "loss": 0.6032, "step": 5050 }, { "epoch": 0.05, "grad_norm": 10.892425537109375, "learning_rate": 2.5345000000000005e-06, "loss": 0.2526, "step": 5075 }, { "epoch": 0.05, "grad_norm": 19.03778076171875, "learning_rate": 2.547e-06, "loss": 0.6328, "step": 5100 }, { "epoch": 0.05, "grad_norm": 13.244776725769043, "learning_rate": 2.5595000000000002e-06, "loss": 0.2864, "step": 5125 }, { "epoch": 0.05, "grad_norm": 21.49582290649414, "learning_rate": 2.572e-06, "loss": 0.5377, "step": 5150 }, { "epoch": 0.05, "grad_norm": 15.646612167358398, "learning_rate": 2.5845000000000004e-06, "loss": 0.2578, "step": 5175 }, { "epoch": 0.05, "grad_norm": 14.2152738571167, "learning_rate": 2.597e-06, "loss": 0.5688, "step": 5200 }, { "epoch": 0.05, "grad_norm": 16.998485565185547, "learning_rate": 2.6095e-06, "loss": 0.2537, "step": 5225 }, { "epoch": 0.05, "grad_norm": 21.870323181152344, "learning_rate": 2.622e-06, "loss": 0.6119, "step": 5250 }, { "epoch": 0.05, "grad_norm": 16.72423553466797, "learning_rate": 2.6345000000000004e-06, "loss": 0.3304, "step": 5275 }, { "epoch": 0.05, "grad_norm": 26.161388397216797, "learning_rate": 2.6470000000000002e-06, "loss": 0.5492, "step": 5300 }, { "epoch": 0.05, "grad_norm": 18.9041748046875, "learning_rate": 2.6595000000000005e-06, "loss": 0.3153, "step": 5325 }, { "epoch": 0.05, "grad_norm": 25.107742309570312, "learning_rate": 2.672e-06, "loss": 0.5772, "step": 5350 }, { "epoch": 0.05, "grad_norm": 15.983378410339355, "learning_rate": 2.6845000000000003e-06, "loss": 0.2813, "step": 5375 }, { "epoch": 0.05, "grad_norm": 19.220504760742188, "learning_rate": 2.697e-06, "loss": 0.5865, "step": 5400 }, { "epoch": 0.05, "grad_norm": 7.650417327880859, "learning_rate": 2.7095000000000005e-06, "loss": 0.228, "step": 5425 }, { "epoch": 0.05, "grad_norm": 28.652082443237305, "learning_rate": 2.7220000000000004e-06, "loss": 0.5349, "step": 5450 }, { "epoch": 0.05, "grad_norm": 14.965731620788574, "learning_rate": 2.7345000000000007e-06, "loss": 0.2319, "step": 5475 }, { "epoch": 0.05, "grad_norm": 26.87473487854004, "learning_rate": 2.747e-06, "loss": 0.5222, "step": 5500 }, { "epoch": 0.05, "grad_norm": 12.213537216186523, "learning_rate": 2.7595e-06, "loss": 0.2461, "step": 5525 }, { "epoch": 0.05, "grad_norm": 27.712162017822266, "learning_rate": 2.7720000000000003e-06, "loss": 0.5624, "step": 5550 }, { "epoch": 0.05, "grad_norm": 9.652873039245605, "learning_rate": 2.7845e-06, "loss": 0.343, "step": 5575 }, { "epoch": 0.06, "grad_norm": 23.97194480895996, "learning_rate": 2.797e-06, "loss": 0.5676, "step": 5600 }, { "epoch": 0.06, "grad_norm": 17.642398834228516, "learning_rate": 2.8095e-06, "loss": 0.2577, "step": 5625 }, { "epoch": 0.06, "grad_norm": 31.620677947998047, "learning_rate": 2.8220000000000003e-06, "loss": 0.573, "step": 5650 }, { "epoch": 0.06, "grad_norm": 14.306438446044922, "learning_rate": 2.8345e-06, "loss": 0.2748, "step": 5675 }, { "epoch": 0.06, "grad_norm": 25.493270874023438, "learning_rate": 2.8470000000000004e-06, "loss": 0.6209, "step": 5700 }, { "epoch": 0.06, "grad_norm": 12.359241485595703, "learning_rate": 2.8595e-06, "loss": 0.2708, "step": 5725 }, { "epoch": 0.06, "grad_norm": 23.575340270996094, "learning_rate": 2.872e-06, "loss": 0.4745, "step": 5750 }, { "epoch": 0.06, "grad_norm": 14.759465217590332, "learning_rate": 2.8845e-06, "loss": 0.3048, "step": 5775 }, { "epoch": 0.06, "grad_norm": 21.302215576171875, "learning_rate": 2.8970000000000004e-06, "loss": 0.5926, "step": 5800 }, { "epoch": 0.06, "grad_norm": 18.573396682739258, "learning_rate": 2.9095000000000003e-06, "loss": 0.303, "step": 5825 }, { "epoch": 0.06, "grad_norm": 26.08246612548828, "learning_rate": 2.9220000000000006e-06, "loss": 0.5796, "step": 5850 }, { "epoch": 0.06, "grad_norm": 8.610541343688965, "learning_rate": 2.9345e-06, "loss": 0.2622, "step": 5875 }, { "epoch": 0.06, "grad_norm": 23.009159088134766, "learning_rate": 2.9470000000000003e-06, "loss": 0.471, "step": 5900 }, { "epoch": 0.06, "grad_norm": 8.611969947814941, "learning_rate": 2.9595e-06, "loss": 0.3149, "step": 5925 }, { "epoch": 0.06, "grad_norm": 22.354230880737305, "learning_rate": 2.9720000000000005e-06, "loss": 0.5575, "step": 5950 }, { "epoch": 0.06, "grad_norm": 9.6683988571167, "learning_rate": 2.9845e-06, "loss": 0.2598, "step": 5975 }, { "epoch": 0.06, "grad_norm": 18.905555725097656, "learning_rate": 2.9970000000000003e-06, "loss": 0.4889, "step": 6000 }, { "epoch": 0.06, "grad_norm": 12.046964645385742, "learning_rate": 3.0095e-06, "loss": 0.256, "step": 6025 }, { "epoch": 0.06, "grad_norm": 26.902498245239258, "learning_rate": 3.0220000000000005e-06, "loss": 0.504, "step": 6050 }, { "epoch": 0.06, "grad_norm": 11.430900573730469, "learning_rate": 3.0345000000000003e-06, "loss": 0.3349, "step": 6075 }, { "epoch": 0.06, "grad_norm": 18.552717208862305, "learning_rate": 3.0470000000000006e-06, "loss": 0.5683, "step": 6100 }, { "epoch": 0.06, "grad_norm": 14.703068733215332, "learning_rate": 3.0595e-06, "loss": 0.2842, "step": 6125 }, { "epoch": 0.06, "grad_norm": 20.6137638092041, "learning_rate": 3.072e-06, "loss": 0.5473, "step": 6150 }, { "epoch": 0.06, "grad_norm": 16.283918380737305, "learning_rate": 3.0845000000000003e-06, "loss": 0.3172, "step": 6175 }, { "epoch": 0.06, "grad_norm": 18.256755828857422, "learning_rate": 3.097e-06, "loss": 0.5073, "step": 6200 }, { "epoch": 0.06, "grad_norm": 10.466363906860352, "learning_rate": 3.1095000000000005e-06, "loss": 0.2985, "step": 6225 }, { "epoch": 0.06, "grad_norm": 20.872129440307617, "learning_rate": 3.122e-06, "loss": 0.5526, "step": 6250 }, { "epoch": 0.06, "grad_norm": 6.591434955596924, "learning_rate": 3.1345000000000002e-06, "loss": 0.2617, "step": 6275 }, { "epoch": 0.06, "grad_norm": 46.64836883544922, "learning_rate": 3.147e-06, "loss": 0.5101, "step": 6300 }, { "epoch": 0.06, "grad_norm": 12.374617576599121, "learning_rate": 3.1595000000000004e-06, "loss": 0.2725, "step": 6325 }, { "epoch": 0.06, "grad_norm": 28.696699142456055, "learning_rate": 3.172e-06, "loss": 0.475, "step": 6350 }, { "epoch": 0.06, "grad_norm": 13.087571144104004, "learning_rate": 3.1845e-06, "loss": 0.2727, "step": 6375 }, { "epoch": 0.06, "grad_norm": 29.651599884033203, "learning_rate": 3.197e-06, "loss": 0.523, "step": 6400 }, { "epoch": 0.06, "grad_norm": 8.625425338745117, "learning_rate": 3.2095000000000004e-06, "loss": 0.2522, "step": 6425 }, { "epoch": 0.06, "grad_norm": 25.1377010345459, "learning_rate": 3.2220000000000002e-06, "loss": 0.5298, "step": 6450 }, { "epoch": 0.06, "grad_norm": 8.436907768249512, "learning_rate": 3.2345000000000005e-06, "loss": 0.2601, "step": 6475 }, { "epoch": 0.06, "grad_norm": 27.34095001220703, "learning_rate": 3.247e-06, "loss": 0.529, "step": 6500 }, { "epoch": 0.06, "grad_norm": 16.335025787353516, "learning_rate": 3.2595000000000003e-06, "loss": 0.2771, "step": 6525 }, { "epoch": 0.06, "grad_norm": 14.815462112426758, "learning_rate": 3.272e-06, "loss": 0.6163, "step": 6550 }, { "epoch": 0.06, "grad_norm": 16.148479461669922, "learning_rate": 3.2845000000000005e-06, "loss": 0.3058, "step": 6575 }, { "epoch": 0.06, "grad_norm": 22.773292541503906, "learning_rate": 3.2970000000000004e-06, "loss": 0.5148, "step": 6600 }, { "epoch": 0.07, "grad_norm": 16.447750091552734, "learning_rate": 3.3095000000000007e-06, "loss": 0.2889, "step": 6625 }, { "epoch": 0.07, "grad_norm": 26.679561614990234, "learning_rate": 3.322e-06, "loss": 0.4984, "step": 6650 }, { "epoch": 0.07, "grad_norm": 14.75298023223877, "learning_rate": 3.3345000000000004e-06, "loss": 0.2873, "step": 6675 }, { "epoch": 0.07, "grad_norm": 16.985498428344727, "learning_rate": 3.3470000000000003e-06, "loss": 0.5391, "step": 6700 }, { "epoch": 0.07, "grad_norm": 12.544781684875488, "learning_rate": 3.3595000000000006e-06, "loss": 0.2779, "step": 6725 }, { "epoch": 0.07, "grad_norm": 21.501564025878906, "learning_rate": 3.372e-06, "loss": 0.6059, "step": 6750 }, { "epoch": 0.07, "grad_norm": 14.099150657653809, "learning_rate": 3.3845e-06, "loss": 0.2809, "step": 6775 }, { "epoch": 0.07, "grad_norm": 30.844390869140625, "learning_rate": 3.3970000000000003e-06, "loss": 0.5161, "step": 6800 }, { "epoch": 0.07, "grad_norm": 17.44480323791504, "learning_rate": 3.4095e-06, "loss": 0.3222, "step": 6825 }, { "epoch": 0.07, "grad_norm": 29.42108154296875, "learning_rate": 3.4220000000000004e-06, "loss": 0.5356, "step": 6850 }, { "epoch": 0.07, "grad_norm": 14.090635299682617, "learning_rate": 3.4345e-06, "loss": 0.3108, "step": 6875 }, { "epoch": 0.07, "grad_norm": 11.435125350952148, "learning_rate": 3.447e-06, "loss": 0.575, "step": 6900 }, { "epoch": 0.07, "grad_norm": 11.024394989013672, "learning_rate": 3.4595e-06, "loss": 0.2828, "step": 6925 }, { "epoch": 0.07, "grad_norm": 23.612144470214844, "learning_rate": 3.4720000000000004e-06, "loss": 0.4771, "step": 6950 }, { "epoch": 0.07, "grad_norm": 17.76152992248535, "learning_rate": 3.4845000000000003e-06, "loss": 0.2792, "step": 6975 }, { "epoch": 0.07, "grad_norm": 22.580734252929688, "learning_rate": 3.4970000000000006e-06, "loss": 0.5426, "step": 7000 }, { "epoch": 0.07, "grad_norm": 14.581759452819824, "learning_rate": 3.5095e-06, "loss": 0.2317, "step": 7025 }, { "epoch": 0.07, "grad_norm": 21.30860710144043, "learning_rate": 3.5220000000000003e-06, "loss": 0.5448, "step": 7050 }, { "epoch": 0.07, "grad_norm": 7.132657051086426, "learning_rate": 3.5345e-06, "loss": 0.3062, "step": 7075 }, { "epoch": 0.07, "grad_norm": 18.89944839477539, "learning_rate": 3.5470000000000005e-06, "loss": 0.503, "step": 7100 }, { "epoch": 0.07, "grad_norm": 15.117323875427246, "learning_rate": 3.5595e-06, "loss": 0.258, "step": 7125 }, { "epoch": 0.07, "grad_norm": 20.31907844543457, "learning_rate": 3.5720000000000003e-06, "loss": 0.4958, "step": 7150 }, { "epoch": 0.07, "grad_norm": 16.49303436279297, "learning_rate": 3.5845e-06, "loss": 0.278, "step": 7175 }, { "epoch": 0.07, "grad_norm": 20.11726188659668, "learning_rate": 3.5970000000000005e-06, "loss": 0.5739, "step": 7200 }, { "epoch": 0.07, "grad_norm": 10.279136657714844, "learning_rate": 3.6095000000000003e-06, "loss": 0.2979, "step": 7225 }, { "epoch": 0.07, "grad_norm": 25.968673706054688, "learning_rate": 3.6220000000000006e-06, "loss": 0.5494, "step": 7250 }, { "epoch": 0.07, "grad_norm": 13.679120063781738, "learning_rate": 3.6345e-06, "loss": 0.3184, "step": 7275 }, { "epoch": 0.07, "grad_norm": 19.92441749572754, "learning_rate": 3.6470000000000004e-06, "loss": 0.5449, "step": 7300 }, { "epoch": 0.07, "grad_norm": 11.890188217163086, "learning_rate": 3.6595000000000003e-06, "loss": 0.2407, "step": 7325 }, { "epoch": 0.07, "grad_norm": 15.573036193847656, "learning_rate": 3.6720000000000006e-06, "loss": 0.5481, "step": 7350 }, { "epoch": 0.07, "grad_norm": 15.6271333694458, "learning_rate": 3.6845000000000005e-06, "loss": 0.2888, "step": 7375 }, { "epoch": 0.07, "grad_norm": 25.30506706237793, "learning_rate": 3.697e-06, "loss": 0.5366, "step": 7400 }, { "epoch": 0.07, "grad_norm": 12.740799903869629, "learning_rate": 3.7095000000000002e-06, "loss": 0.3009, "step": 7425 }, { "epoch": 0.07, "grad_norm": 24.390710830688477, "learning_rate": 3.722e-06, "loss": 0.5153, "step": 7450 }, { "epoch": 0.07, "grad_norm": 12.136968612670898, "learning_rate": 3.7345000000000004e-06, "loss": 0.2922, "step": 7475 }, { "epoch": 0.07, "grad_norm": 30.467281341552734, "learning_rate": 3.7470000000000003e-06, "loss": 0.5903, "step": 7500 }, { "epoch": 0.07, "grad_norm": 6.967930793762207, "learning_rate": 3.7595e-06, "loss": 0.2405, "step": 7525 }, { "epoch": 0.07, "grad_norm": 18.92106819152832, "learning_rate": 3.772e-06, "loss": 0.5048, "step": 7550 }, { "epoch": 0.07, "grad_norm": 15.261474609375, "learning_rate": 3.7845000000000004e-06, "loss": 0.2857, "step": 7575 }, { "epoch": 0.07, "grad_norm": 27.138721466064453, "learning_rate": 3.7970000000000002e-06, "loss": 0.5426, "step": 7600 }, { "epoch": 0.07, "grad_norm": 14.487828254699707, "learning_rate": 3.8095000000000005e-06, "loss": 0.2875, "step": 7625 }, { "epoch": 0.08, "grad_norm": 17.192646026611328, "learning_rate": 3.822e-06, "loss": 0.5535, "step": 7650 }, { "epoch": 0.08, "grad_norm": 11.033638000488281, "learning_rate": 3.8345e-06, "loss": 0.2054, "step": 7675 }, { "epoch": 0.08, "grad_norm": 24.011253356933594, "learning_rate": 3.847e-06, "loss": 0.5617, "step": 7700 }, { "epoch": 0.08, "grad_norm": 14.284228324890137, "learning_rate": 3.8595e-06, "loss": 0.2689, "step": 7725 }, { "epoch": 0.08, "grad_norm": 47.69736862182617, "learning_rate": 3.872e-06, "loss": 0.5406, "step": 7750 }, { "epoch": 0.08, "grad_norm": 10.276515007019043, "learning_rate": 3.884500000000001e-06, "loss": 0.2644, "step": 7775 }, { "epoch": 0.08, "grad_norm": 19.28299331665039, "learning_rate": 3.897e-06, "loss": 0.438, "step": 7800 }, { "epoch": 0.08, "grad_norm": 11.86619758605957, "learning_rate": 3.9095000000000004e-06, "loss": 0.2589, "step": 7825 }, { "epoch": 0.08, "grad_norm": 24.30010223388672, "learning_rate": 3.922e-06, "loss": 0.5302, "step": 7850 }, { "epoch": 0.08, "grad_norm": 12.07744312286377, "learning_rate": 3.9345e-06, "loss": 0.23, "step": 7875 }, { "epoch": 0.08, "grad_norm": 22.55868148803711, "learning_rate": 3.9470000000000005e-06, "loss": 0.5428, "step": 7900 }, { "epoch": 0.08, "grad_norm": 15.163516998291016, "learning_rate": 3.959500000000001e-06, "loss": 0.2941, "step": 7925 }, { "epoch": 0.08, "grad_norm": 23.18950080871582, "learning_rate": 3.972e-06, "loss": 0.5816, "step": 7950 }, { "epoch": 0.08, "grad_norm": 7.0976667404174805, "learning_rate": 3.9845000000000006e-06, "loss": 0.2755, "step": 7975 }, { "epoch": 0.08, "grad_norm": 19.657718658447266, "learning_rate": 3.997e-06, "loss": 0.5245, "step": 8000 }, { "epoch": 0.08, "grad_norm": 11.001235008239746, "learning_rate": 4.0095e-06, "loss": 0.2247, "step": 8025 }, { "epoch": 0.08, "grad_norm": 22.4105224609375, "learning_rate": 4.022000000000001e-06, "loss": 0.5023, "step": 8050 }, { "epoch": 0.08, "grad_norm": 12.31091022491455, "learning_rate": 4.0345e-06, "loss": 0.2012, "step": 8075 }, { "epoch": 0.08, "grad_norm": 22.013017654418945, "learning_rate": 4.047e-06, "loss": 0.5185, "step": 8100 }, { "epoch": 0.08, "grad_norm": 8.91584587097168, "learning_rate": 4.0595e-06, "loss": 0.2631, "step": 8125 }, { "epoch": 0.08, "grad_norm": 31.5472412109375, "learning_rate": 4.072e-06, "loss": 0.5179, "step": 8150 }, { "epoch": 0.08, "grad_norm": 12.509997367858887, "learning_rate": 4.0845000000000004e-06, "loss": 0.3128, "step": 8175 }, { "epoch": 0.08, "grad_norm": 21.462047576904297, "learning_rate": 4.097000000000001e-06, "loss": 0.5674, "step": 8200 }, { "epoch": 0.08, "grad_norm": 12.810897827148438, "learning_rate": 4.1095e-06, "loss": 0.2701, "step": 8225 }, { "epoch": 0.08, "grad_norm": 20.359907150268555, "learning_rate": 4.1220000000000005e-06, "loss": 0.45, "step": 8250 }, { "epoch": 0.08, "grad_norm": 10.811437606811523, "learning_rate": 4.1345e-06, "loss": 0.2839, "step": 8275 }, { "epoch": 0.08, "grad_norm": 27.459075927734375, "learning_rate": 4.147e-06, "loss": 0.5704, "step": 8300 }, { "epoch": 0.08, "grad_norm": 7.3411970138549805, "learning_rate": 4.159500000000001e-06, "loss": 0.2809, "step": 8325 }, { "epoch": 0.08, "grad_norm": 23.438993453979492, "learning_rate": 4.172000000000001e-06, "loss": 0.5493, "step": 8350 }, { "epoch": 0.08, "grad_norm": 10.128694534301758, "learning_rate": 4.1845e-06, "loss": 0.255, "step": 8375 }, { "epoch": 0.08, "grad_norm": 25.745107650756836, "learning_rate": 4.1965e-06, "loss": 0.604, "step": 8400 }, { "epoch": 0.08, "grad_norm": 7.743176460266113, "learning_rate": 4.209000000000001e-06, "loss": 0.3086, "step": 8425 }, { "epoch": 0.08, "grad_norm": 18.59846305847168, "learning_rate": 4.221500000000001e-06, "loss": 0.5875, "step": 8450 }, { "epoch": 0.08, "grad_norm": 7.114607810974121, "learning_rate": 4.2340000000000005e-06, "loss": 0.3083, "step": 8475 }, { "epoch": 0.08, "grad_norm": 25.411457061767578, "learning_rate": 4.246500000000001e-06, "loss": 0.4891, "step": 8500 }, { "epoch": 0.08, "grad_norm": 11.201309204101562, "learning_rate": 4.259e-06, "loss": 0.2839, "step": 8525 }, { "epoch": 0.08, "grad_norm": 26.80746078491211, "learning_rate": 4.2715e-06, "loss": 0.5617, "step": 8550 }, { "epoch": 0.08, "grad_norm": 6.9401631355285645, "learning_rate": 4.284e-06, "loss": 0.2972, "step": 8575 }, { "epoch": 0.08, "grad_norm": 20.14504051208496, "learning_rate": 4.2965e-06, "loss": 0.5346, "step": 8600 }, { "epoch": 0.08, "grad_norm": 11.276283264160156, "learning_rate": 4.309000000000001e-06, "loss": 0.3023, "step": 8625 }, { "epoch": 0.09, "grad_norm": 17.781892776489258, "learning_rate": 4.3215e-06, "loss": 0.5617, "step": 8650 }, { "epoch": 0.09, "grad_norm": 13.588680267333984, "learning_rate": 4.334e-06, "loss": 0.2622, "step": 8675 }, { "epoch": 0.09, "grad_norm": 24.179786682128906, "learning_rate": 4.3465e-06, "loss": 0.5532, "step": 8700 }, { "epoch": 0.09, "grad_norm": 11.250155448913574, "learning_rate": 4.359e-06, "loss": 0.2731, "step": 8725 }, { "epoch": 0.09, "grad_norm": 15.135162353515625, "learning_rate": 4.3715e-06, "loss": 0.497, "step": 8750 }, { "epoch": 0.09, "grad_norm": 10.31822395324707, "learning_rate": 4.384000000000001e-06, "loss": 0.2437, "step": 8775 }, { "epoch": 0.09, "grad_norm": 30.385175704956055, "learning_rate": 4.3965e-06, "loss": 0.5371, "step": 8800 }, { "epoch": 0.09, "grad_norm": 12.726698875427246, "learning_rate": 4.4090000000000005e-06, "loss": 0.2183, "step": 8825 }, { "epoch": 0.09, "grad_norm": 22.664262771606445, "learning_rate": 4.4215e-06, "loss": 0.4968, "step": 8850 }, { "epoch": 0.09, "grad_norm": 15.585986137390137, "learning_rate": 4.434e-06, "loss": 0.2466, "step": 8875 }, { "epoch": 0.09, "grad_norm": 25.449874877929688, "learning_rate": 4.4465000000000005e-06, "loss": 0.6154, "step": 8900 }, { "epoch": 0.09, "grad_norm": 12.356121063232422, "learning_rate": 4.459000000000001e-06, "loss": 0.2829, "step": 8925 }, { "epoch": 0.09, "grad_norm": 18.43619728088379, "learning_rate": 4.4715e-06, "loss": 0.5538, "step": 8950 }, { "epoch": 0.09, "grad_norm": 9.308497428894043, "learning_rate": 4.484000000000001e-06, "loss": 0.2768, "step": 8975 }, { "epoch": 0.09, "grad_norm": Infinity, "learning_rate": 4.496000000000001e-06, "loss": 0.4914, "step": 9000 }, { "epoch": 0.09, "grad_norm": 18.44943618774414, "learning_rate": 4.5085e-06, "loss": 0.2747, "step": 9025 }, { "epoch": 0.09, "grad_norm": 29.388303756713867, "learning_rate": 4.521e-06, "loss": 0.5369, "step": 9050 }, { "epoch": 0.09, "grad_norm": 12.3926362991333, "learning_rate": 4.5335e-06, "loss": 0.2507, "step": 9075 }, { "epoch": 0.09, "grad_norm": 19.307252883911133, "learning_rate": 4.546e-06, "loss": 0.4967, "step": 9100 }, { "epoch": 0.09, "grad_norm": 12.92453384399414, "learning_rate": 4.5585000000000005e-06, "loss": 0.287, "step": 9125 }, { "epoch": 0.09, "grad_norm": 26.709110260009766, "learning_rate": 4.571000000000001e-06, "loss": 0.5763, "step": 9150 }, { "epoch": 0.09, "grad_norm": 15.24050521850586, "learning_rate": 4.5835e-06, "loss": 0.2409, "step": 9175 }, { "epoch": 0.09, "grad_norm": 21.145462036132812, "learning_rate": 4.5960000000000006e-06, "loss": 0.4986, "step": 9200 }, { "epoch": 0.09, "grad_norm": 7.725516319274902, "learning_rate": 4.6085e-06, "loss": 0.2354, "step": 9225 }, { "epoch": 0.09, "grad_norm": 25.198699951171875, "learning_rate": 4.621e-06, "loss": 0.5568, "step": 9250 }, { "epoch": 0.09, "grad_norm": 17.880329132080078, "learning_rate": 4.633500000000001e-06, "loss": 0.2843, "step": 9275 }, { "epoch": 0.09, "grad_norm": 21.43701934814453, "learning_rate": 4.646000000000001e-06, "loss": 0.5694, "step": 9300 }, { "epoch": 0.09, "grad_norm": 9.041929244995117, "learning_rate": 4.6585e-06, "loss": 0.2693, "step": 9325 }, { "epoch": 0.09, "grad_norm": 28.347461700439453, "learning_rate": 4.671000000000001e-06, "loss": 0.636, "step": 9350 }, { "epoch": 0.09, "grad_norm": 8.775546073913574, "learning_rate": 4.6835e-06, "loss": 0.2694, "step": 9375 }, { "epoch": 0.09, "grad_norm": 23.527692794799805, "learning_rate": 4.6960000000000004e-06, "loss": 0.5692, "step": 9400 }, { "epoch": 0.09, "grad_norm": 15.176011085510254, "learning_rate": 4.7085e-06, "loss": 0.2695, "step": 9425 }, { "epoch": 0.09, "grad_norm": 25.995019912719727, "learning_rate": 4.721e-06, "loss": 0.5779, "step": 9450 }, { "epoch": 0.09, "grad_norm": 19.048351287841797, "learning_rate": 4.7335000000000005e-06, "loss": 0.3027, "step": 9475 }, { "epoch": 0.09, "grad_norm": 16.325176239013672, "learning_rate": 4.746000000000001e-06, "loss": 0.4801, "step": 9500 }, { "epoch": 0.09, "grad_norm": 12.076348304748535, "learning_rate": 4.7585e-06, "loss": 0.2552, "step": 9525 }, { "epoch": 0.09, "grad_norm": 21.77154541015625, "learning_rate": 4.7710000000000006e-06, "loss": 0.6053, "step": 9550 }, { "epoch": 0.09, "grad_norm": 9.516436576843262, "learning_rate": 4.7835e-06, "loss": 0.229, "step": 9575 }, { "epoch": 0.09, "grad_norm": 15.7752685546875, "learning_rate": 4.796e-06, "loss": 0.5375, "step": 9600 }, { "epoch": 0.09, "grad_norm": 12.756731033325195, "learning_rate": 4.808500000000001e-06, "loss": 0.2737, "step": 9625 }, { "epoch": 0.09, "grad_norm": 19.367582321166992, "learning_rate": 4.821e-06, "loss": 0.5351, "step": 9650 }, { "epoch": 0.1, "grad_norm": 17.34848976135254, "learning_rate": 4.8335e-06, "loss": 0.2389, "step": 9675 }, { "epoch": 0.1, "grad_norm": 15.988851547241211, "learning_rate": 4.846e-06, "loss": 0.4897, "step": 9700 }, { "epoch": 0.1, "grad_norm": 12.145166397094727, "learning_rate": 4.8585e-06, "loss": 0.2797, "step": 9725 }, { "epoch": 0.1, "grad_norm": 19.768024444580078, "learning_rate": 4.8710000000000005e-06, "loss": 0.5447, "step": 9750 }, { "epoch": 0.1, "grad_norm": 7.791076183319092, "learning_rate": 4.883500000000001e-06, "loss": 0.2826, "step": 9775 }, { "epoch": 0.1, "grad_norm": 23.744543075561523, "learning_rate": 4.896e-06, "loss": 0.5084, "step": 9800 }, { "epoch": 0.1, "grad_norm": 11.099268913269043, "learning_rate": 4.9085000000000005e-06, "loss": 0.3077, "step": 9825 }, { "epoch": 0.1, "grad_norm": 21.140033721923828, "learning_rate": 4.921e-06, "loss": 0.5492, "step": 9850 }, { "epoch": 0.1, "grad_norm": 13.118725776672363, "learning_rate": 4.9335e-06, "loss": 0.2488, "step": 9875 }, { "epoch": 0.1, "grad_norm": 20.41465187072754, "learning_rate": 4.946000000000001e-06, "loss": 0.5449, "step": 9900 }, { "epoch": 0.1, "grad_norm": 8.420760154724121, "learning_rate": 4.958500000000001e-06, "loss": 0.2805, "step": 9925 }, { "epoch": 0.1, "grad_norm": 24.45344352722168, "learning_rate": 4.971e-06, "loss": 0.5413, "step": 9950 }, { "epoch": 0.1, "grad_norm": 13.209565162658691, "learning_rate": 4.983500000000001e-06, "loss": 0.3104, "step": 9975 }, { "epoch": 0.1, "grad_norm": 28.849084854125977, "learning_rate": 4.996e-06, "loss": 0.5204, "step": 10000 }, { "epoch": 0.1, "grad_norm": 15.891327857971191, "learning_rate": 4.9997068965517244e-06, "loss": 0.2455, "step": 10025 }, { "epoch": 0.1, "grad_norm": 20.44768714904785, "learning_rate": 4.999275862068966e-06, "loss": 0.5231, "step": 10050 }, { "epoch": 0.1, "grad_norm": 10.863666534423828, "learning_rate": 4.998844827586207e-06, "loss": 0.2874, "step": 10075 }, { "epoch": 0.1, "grad_norm": 23.030961990356445, "learning_rate": 4.998413793103449e-06, "loss": 0.5116, "step": 10100 }, { "epoch": 0.1, "grad_norm": 15.583869934082031, "learning_rate": 4.99798275862069e-06, "loss": 0.2608, "step": 10125 }, { "epoch": 0.1, "grad_norm": 24.405956268310547, "learning_rate": 4.997551724137931e-06, "loss": 0.5271, "step": 10150 }, { "epoch": 0.1, "grad_norm": 10.761184692382812, "learning_rate": 4.997120689655172e-06, "loss": 0.262, "step": 10175 }, { "epoch": 0.1, "grad_norm": 24.432065963745117, "learning_rate": 4.996689655172415e-06, "loss": 0.4652, "step": 10200 }, { "epoch": 0.1, "grad_norm": 11.49792766571045, "learning_rate": 4.996258620689656e-06, "loss": 0.2894, "step": 10225 }, { "epoch": 0.1, "grad_norm": 19.564666748046875, "learning_rate": 4.9958275862068975e-06, "loss": 0.5876, "step": 10250 }, { "epoch": 0.1, "grad_norm": 14.309235572814941, "learning_rate": 4.995396551724138e-06, "loss": 0.246, "step": 10275 }, { "epoch": 0.1, "grad_norm": 20.50017547607422, "learning_rate": 4.99496551724138e-06, "loss": 0.5546, "step": 10300 }, { "epoch": 0.1, "grad_norm": 12.31285285949707, "learning_rate": 4.994534482758621e-06, "loss": 0.2964, "step": 10325 }, { "epoch": 0.1, "grad_norm": 22.056838989257812, "learning_rate": 4.9941034482758625e-06, "loss": 0.6069, "step": 10350 }, { "epoch": 0.1, "grad_norm": 10.586868286132812, "learning_rate": 4.993672413793104e-06, "loss": 0.2354, "step": 10375 }, { "epoch": 0.1, "grad_norm": 21.403858184814453, "learning_rate": 4.9932413793103454e-06, "loss": 0.4749, "step": 10400 }, { "epoch": 0.1, "grad_norm": 14.441191673278809, "learning_rate": 4.992810344827587e-06, "loss": 0.2773, "step": 10425 }, { "epoch": 0.1, "grad_norm": 28.595489501953125, "learning_rate": 4.992379310344828e-06, "loss": 0.5719, "step": 10450 }, { "epoch": 0.1, "grad_norm": 12.980815887451172, "learning_rate": 4.99194827586207e-06, "loss": 0.285, "step": 10475 }, { "epoch": 0.1, "grad_norm": 18.74728775024414, "learning_rate": 4.99151724137931e-06, "loss": 0.4739, "step": 10500 }, { "epoch": 0.1, "grad_norm": 9.723237037658691, "learning_rate": 4.991086206896552e-06, "loss": 0.2853, "step": 10525 }, { "epoch": 0.1, "grad_norm": 17.475486755371094, "learning_rate": 4.990655172413793e-06, "loss": 0.5207, "step": 10550 }, { "epoch": 0.1, "grad_norm": 22.172344207763672, "learning_rate": 4.990224137931035e-06, "loss": 0.2951, "step": 10575 }, { "epoch": 0.1, "grad_norm": 18.715251922607422, "learning_rate": 4.989793103448276e-06, "loss": 0.4841, "step": 10600 }, { "epoch": 0.1, "grad_norm": 9.711993217468262, "learning_rate": 4.989362068965518e-06, "loss": 0.2636, "step": 10625 }, { "epoch": 0.1, "grad_norm": 27.51717758178711, "learning_rate": 4.988931034482759e-06, "loss": 0.494, "step": 10650 }, { "epoch": 0.1, "grad_norm": 10.353540420532227, "learning_rate": 4.988500000000001e-06, "loss": 0.2625, "step": 10675 }, { "epoch": 0.11, "grad_norm": 29.268474578857422, "learning_rate": 4.988068965517241e-06, "loss": 0.5564, "step": 10700 }, { "epoch": 0.11, "grad_norm": 14.897806167602539, "learning_rate": 4.987637931034483e-06, "loss": 0.3069, "step": 10725 }, { "epoch": 0.11, "grad_norm": 21.880765914916992, "learning_rate": 4.987206896551724e-06, "loss": 0.4933, "step": 10750 }, { "epoch": 0.11, "grad_norm": 14.001994132995605, "learning_rate": 4.9867758620689664e-06, "loss": 0.2651, "step": 10775 }, { "epoch": 0.11, "grad_norm": 25.434017181396484, "learning_rate": 4.986344827586208e-06, "loss": 0.5145, "step": 10800 }, { "epoch": 0.11, "grad_norm": 9.929616928100586, "learning_rate": 4.9859137931034485e-06, "loss": 0.2641, "step": 10825 }, { "epoch": 0.11, "grad_norm": 20.490352630615234, "learning_rate": 4.98548275862069e-06, "loss": 0.5009, "step": 10850 }, { "epoch": 0.11, "grad_norm": 12.5620756149292, "learning_rate": 4.985051724137931e-06, "loss": 0.2824, "step": 10875 }, { "epoch": 0.11, "grad_norm": 28.161970138549805, "learning_rate": 4.984620689655173e-06, "loss": 0.4447, "step": 10900 }, { "epoch": 0.11, "grad_norm": 8.578015327453613, "learning_rate": 4.984189655172414e-06, "loss": 0.2536, "step": 10925 }, { "epoch": 0.11, "grad_norm": 21.80605697631836, "learning_rate": 4.983758620689656e-06, "loss": 0.5424, "step": 10950 }, { "epoch": 0.11, "grad_norm": 10.824913024902344, "learning_rate": 4.983327586206897e-06, "loss": 0.2694, "step": 10975 }, { "epoch": 0.11, "grad_norm": 22.84677505493164, "learning_rate": 4.982896551724139e-06, "loss": 0.5142, "step": 11000 }, { "epoch": 0.11, "grad_norm": 8.476611137390137, "learning_rate": 4.98246551724138e-06, "loss": 0.2676, "step": 11025 }, { "epoch": 0.11, "grad_norm": 30.278989791870117, "learning_rate": 4.982034482758621e-06, "loss": 0.4705, "step": 11050 }, { "epoch": 0.11, "grad_norm": 16.036714553833008, "learning_rate": 4.981603448275862e-06, "loss": 0.2904, "step": 11075 }, { "epoch": 0.11, "grad_norm": 26.722576141357422, "learning_rate": 4.981172413793104e-06, "loss": 0.4803, "step": 11100 }, { "epoch": 0.11, "grad_norm": 14.547075271606445, "learning_rate": 4.980741379310345e-06, "loss": 0.274, "step": 11125 }, { "epoch": 0.11, "grad_norm": 17.87710189819336, "learning_rate": 4.980310344827587e-06, "loss": 0.4589, "step": 11150 }, { "epoch": 0.11, "grad_norm": 19.417402267456055, "learning_rate": 4.979879310344828e-06, "loss": 0.2625, "step": 11175 }, { "epoch": 0.11, "grad_norm": 19.350868225097656, "learning_rate": 4.9794482758620695e-06, "loss": 0.5586, "step": 11200 }, { "epoch": 0.11, "grad_norm": 15.509382247924805, "learning_rate": 4.979017241379311e-06, "loss": 0.2806, "step": 11225 }, { "epoch": 0.11, "grad_norm": 19.99104118347168, "learning_rate": 4.978586206896552e-06, "loss": 0.5068, "step": 11250 }, { "epoch": 0.11, "grad_norm": 9.65737533569336, "learning_rate": 4.978155172413793e-06, "loss": 0.2887, "step": 11275 }, { "epoch": 0.11, "grad_norm": 27.635833740234375, "learning_rate": 4.9777241379310345e-06, "loss": 0.6226, "step": 11300 }, { "epoch": 0.11, "grad_norm": 10.346014022827148, "learning_rate": 4.977293103448276e-06, "loss": 0.2581, "step": 11325 }, { "epoch": 0.11, "grad_norm": 18.3278865814209, "learning_rate": 4.976879310344828e-06, "loss": 0.5269, "step": 11350 }, { "epoch": 0.11, "grad_norm": 7.7030205726623535, "learning_rate": 4.976448275862069e-06, "loss": 0.3005, "step": 11375 }, { "epoch": 0.11, "grad_norm": 13.335371971130371, "learning_rate": 4.97601724137931e-06, "loss": 0.5188, "step": 11400 }, { "epoch": 0.11, "grad_norm": 9.136314392089844, "learning_rate": 4.975586206896552e-06, "loss": 0.2821, "step": 11425 }, { "epoch": 0.11, "grad_norm": 21.608285903930664, "learning_rate": 4.975155172413794e-06, "loss": 0.5689, "step": 11450 }, { "epoch": 0.11, "grad_norm": 13.004762649536133, "learning_rate": 4.9747241379310355e-06, "loss": 0.3081, "step": 11475 }, { "epoch": 0.11, "grad_norm": 15.334341049194336, "learning_rate": 4.974293103448276e-06, "loss": 0.5211, "step": 11500 }, { "epoch": 0.11, "grad_norm": 20.629728317260742, "learning_rate": 4.973862068965518e-06, "loss": 0.2549, "step": 11525 }, { "epoch": 0.11, "grad_norm": 15.838205337524414, "learning_rate": 4.973431034482759e-06, "loss": 0.5178, "step": 11550 }, { "epoch": 0.11, "grad_norm": 11.46971321105957, "learning_rate": 4.9730000000000005e-06, "loss": 0.2457, "step": 11575 }, { "epoch": 0.11, "grad_norm": 34.8980712890625, "learning_rate": 4.972568965517242e-06, "loss": 0.5679, "step": 11600 }, { "epoch": 0.11, "grad_norm": 9.514849662780762, "learning_rate": 4.972137931034483e-06, "loss": 0.2825, "step": 11625 }, { "epoch": 0.11, "grad_norm": 25.165727615356445, "learning_rate": 4.971706896551725e-06, "loss": 0.5494, "step": 11650 }, { "epoch": 0.11, "grad_norm": 16.84276580810547, "learning_rate": 4.971275862068966e-06, "loss": 0.2849, "step": 11675 }, { "epoch": 0.12, "grad_norm": 21.287202835083008, "learning_rate": 4.970844827586208e-06, "loss": 0.5762, "step": 11700 }, { "epoch": 0.12, "grad_norm": 11.807638168334961, "learning_rate": 4.970413793103448e-06, "loss": 0.2868, "step": 11725 }, { "epoch": 0.12, "grad_norm": 19.22753143310547, "learning_rate": 4.96998275862069e-06, "loss": 0.4633, "step": 11750 }, { "epoch": 0.12, "grad_norm": 12.132896423339844, "learning_rate": 4.969551724137931e-06, "loss": 0.259, "step": 11775 }, { "epoch": 0.12, "grad_norm": 18.195262908935547, "learning_rate": 4.969120689655173e-06, "loss": 0.5333, "step": 11800 }, { "epoch": 0.12, "grad_norm": 8.915534019470215, "learning_rate": 4.968689655172414e-06, "loss": 0.2972, "step": 11825 }, { "epoch": 0.12, "grad_norm": 27.589218139648438, "learning_rate": 4.968258620689656e-06, "loss": 0.5136, "step": 11850 }, { "epoch": 0.12, "grad_norm": 8.493362426757812, "learning_rate": 4.967827586206897e-06, "loss": 0.2975, "step": 11875 }, { "epoch": 0.12, "grad_norm": 22.95363426208496, "learning_rate": 4.967396551724139e-06, "loss": 0.5271, "step": 11900 }, { "epoch": 0.12, "grad_norm": 10.436800003051758, "learning_rate": 4.96696551724138e-06, "loss": 0.2379, "step": 11925 }, { "epoch": 0.12, "grad_norm": 19.457244873046875, "learning_rate": 4.966534482758621e-06, "loss": 0.5949, "step": 11950 }, { "epoch": 0.12, "grad_norm": 12.233224868774414, "learning_rate": 4.966103448275862e-06, "loss": 0.2874, "step": 11975 }, { "epoch": 0.12, "grad_norm": 15.971617698669434, "learning_rate": 4.9656724137931036e-06, "loss": 0.5059, "step": 12000 }, { "epoch": 0.12, "grad_norm": 12.681455612182617, "learning_rate": 4.965241379310346e-06, "loss": 0.2825, "step": 12025 }, { "epoch": 0.12, "grad_norm": 10.246992111206055, "learning_rate": 4.9648103448275865e-06, "loss": 0.5214, "step": 12050 }, { "epoch": 0.12, "grad_norm": 13.953018188476562, "learning_rate": 4.964379310344828e-06, "loss": 0.239, "step": 12075 }, { "epoch": 0.12, "grad_norm": 25.995325088500977, "learning_rate": 4.963948275862069e-06, "loss": 0.5549, "step": 12100 }, { "epoch": 0.12, "grad_norm": 10.684301376342773, "learning_rate": 4.963517241379311e-06, "loss": 0.3107, "step": 12125 }, { "epoch": 0.12, "grad_norm": 21.4627628326416, "learning_rate": 4.963086206896552e-06, "loss": 0.541, "step": 12150 }, { "epoch": 0.12, "grad_norm": 10.811058044433594, "learning_rate": 4.962655172413794e-06, "loss": 0.2458, "step": 12175 }, { "epoch": 0.12, "grad_norm": 23.79206085205078, "learning_rate": 4.962224137931035e-06, "loss": 0.528, "step": 12200 }, { "epoch": 0.12, "grad_norm": 15.849831581115723, "learning_rate": 4.961793103448277e-06, "loss": 0.2512, "step": 12225 }, { "epoch": 0.12, "grad_norm": 21.987567901611328, "learning_rate": 4.961362068965518e-06, "loss": 0.5186, "step": 12250 }, { "epoch": 0.12, "grad_norm": 19.294559478759766, "learning_rate": 4.960931034482759e-06, "loss": 0.306, "step": 12275 }, { "epoch": 0.12, "grad_norm": 18.618684768676758, "learning_rate": 4.9605e-06, "loss": 0.5741, "step": 12300 }, { "epoch": 0.12, "grad_norm": 17.722763061523438, "learning_rate": 4.960068965517242e-06, "loss": 0.284, "step": 12325 }, { "epoch": 0.12, "grad_norm": 20.22948455810547, "learning_rate": 4.959637931034483e-06, "loss": 0.4682, "step": 12350 }, { "epoch": 0.12, "grad_norm": 11.699563980102539, "learning_rate": 4.9592068965517246e-06, "loss": 0.2421, "step": 12375 }, { "epoch": 0.12, "grad_norm": 30.59283447265625, "learning_rate": 4.958775862068966e-06, "loss": 0.4998, "step": 12400 }, { "epoch": 0.12, "grad_norm": 11.684243202209473, "learning_rate": 4.9583448275862075e-06, "loss": 0.254, "step": 12425 }, { "epoch": 0.12, "grad_norm": 21.6456356048584, "learning_rate": 4.957913793103449e-06, "loss": 0.5247, "step": 12450 }, { "epoch": 0.12, "grad_norm": 13.288766860961914, "learning_rate": 4.95748275862069e-06, "loss": 0.268, "step": 12475 }, { "epoch": 0.12, "grad_norm": 16.96599578857422, "learning_rate": 4.957051724137931e-06, "loss": 0.5388, "step": 12500 }, { "epoch": 0.12, "grad_norm": 17.540864944458008, "learning_rate": 4.9566206896551725e-06, "loss": 0.2394, "step": 12525 }, { "epoch": 0.12, "grad_norm": 22.539445877075195, "learning_rate": 4.956189655172414e-06, "loss": 0.5463, "step": 12550 }, { "epoch": 0.12, "grad_norm": 10.467499732971191, "learning_rate": 4.955758620689655e-06, "loss": 0.2302, "step": 12575 }, { "epoch": 0.12, "grad_norm": 21.483903884887695, "learning_rate": 4.955327586206897e-06, "loss": 0.5103, "step": 12600 }, { "epoch": 0.12, "grad_norm": 10.595412254333496, "learning_rate": 4.954896551724138e-06, "loss": 0.2681, "step": 12625 }, { "epoch": 0.12, "grad_norm": 29.31808853149414, "learning_rate": 4.95446551724138e-06, "loss": 0.5115, "step": 12650 }, { "epoch": 0.12, "grad_norm": 6.48312520980835, "learning_rate": 4.954034482758621e-06, "loss": 0.303, "step": 12675 }, { "epoch": 0.12, "grad_norm": 35.123802185058594, "learning_rate": 4.953603448275863e-06, "loss": 0.529, "step": 12700 }, { "epoch": 0.13, "grad_norm": 29.856128692626953, "learning_rate": 4.953172413793103e-06, "loss": 0.2188, "step": 12725 }, { "epoch": 0.13, "grad_norm": 18.753149032592773, "learning_rate": 4.9527413793103456e-06, "loss": 0.6342, "step": 12750 }, { "epoch": 0.13, "grad_norm": 5.35963249206543, "learning_rate": 4.952310344827587e-06, "loss": 0.2221, "step": 12775 }, { "epoch": 0.13, "grad_norm": 22.648324966430664, "learning_rate": 4.9518793103448285e-06, "loss": 0.4422, "step": 12800 }, { "epoch": 0.13, "grad_norm": 8.620927810668945, "learning_rate": 4.951448275862069e-06, "loss": 0.2725, "step": 12825 }, { "epoch": 0.13, "grad_norm": 17.309906005859375, "learning_rate": 4.9510172413793105e-06, "loss": 0.5349, "step": 12850 }, { "epoch": 0.13, "grad_norm": 13.908350944519043, "learning_rate": 4.950586206896552e-06, "loss": 0.2469, "step": 12875 }, { "epoch": 0.13, "grad_norm": 21.295846939086914, "learning_rate": 4.9501551724137935e-06, "loss": 0.4613, "step": 12900 }, { "epoch": 0.13, "grad_norm": 19.17820930480957, "learning_rate": 4.949724137931035e-06, "loss": 0.3042, "step": 12925 }, { "epoch": 0.13, "grad_norm": 22.640756607055664, "learning_rate": 4.949293103448276e-06, "loss": 0.4534, "step": 12950 }, { "epoch": 0.13, "grad_norm": 11.067428588867188, "learning_rate": 4.948862068965518e-06, "loss": 0.2876, "step": 12975 }, { "epoch": 0.13, "grad_norm": 33.91845703125, "learning_rate": 4.948431034482759e-06, "loss": 0.4567, "step": 13000 }, { "epoch": 0.13, "grad_norm": 8.577398300170898, "learning_rate": 4.948000000000001e-06, "loss": 0.249, "step": 13025 }, { "epoch": 0.13, "grad_norm": 19.673763275146484, "learning_rate": 4.947568965517241e-06, "loss": 0.4809, "step": 13050 }, { "epoch": 0.13, "grad_norm": 9.459444046020508, "learning_rate": 4.947137931034483e-06, "loss": 0.255, "step": 13075 }, { "epoch": 0.13, "grad_norm": 22.70823860168457, "learning_rate": 4.946706896551724e-06, "loss": 0.5146, "step": 13100 }, { "epoch": 0.13, "grad_norm": 13.672142028808594, "learning_rate": 4.946275862068966e-06, "loss": 0.2985, "step": 13125 }, { "epoch": 0.13, "grad_norm": 22.14682388305664, "learning_rate": 4.945844827586207e-06, "loss": 0.4633, "step": 13150 }, { "epoch": 0.13, "grad_norm": 11.927120208740234, "learning_rate": 4.945413793103449e-06, "loss": 0.3073, "step": 13175 }, { "epoch": 0.13, "grad_norm": 19.73905372619629, "learning_rate": 4.94498275862069e-06, "loss": 0.4908, "step": 13200 }, { "epoch": 0.13, "grad_norm": 14.23742961883545, "learning_rate": 4.9445517241379315e-06, "loss": 0.2506, "step": 13225 }, { "epoch": 0.13, "grad_norm": 21.92215347290039, "learning_rate": 4.944120689655173e-06, "loss": 0.5514, "step": 13250 }, { "epoch": 0.13, "grad_norm": 13.897258758544922, "learning_rate": 4.943689655172414e-06, "loss": 0.2969, "step": 13275 }, { "epoch": 0.13, "grad_norm": 16.803466796875, "learning_rate": 4.943258620689655e-06, "loss": 0.4721, "step": 13300 }, { "epoch": 0.13, "grad_norm": 11.795923233032227, "learning_rate": 4.942827586206897e-06, "loss": 0.2184, "step": 13325 }, { "epoch": 0.13, "grad_norm": 15.188496589660645, "learning_rate": 4.942396551724139e-06, "loss": 0.4246, "step": 13350 }, { "epoch": 0.13, "grad_norm": 12.313096046447754, "learning_rate": 4.94196551724138e-06, "loss": 0.2589, "step": 13375 }, { "epoch": 0.13, "grad_norm": 30.13702964782715, "learning_rate": 4.941534482758621e-06, "loss": 0.4988, "step": 13400 }, { "epoch": 0.13, "grad_norm": 20.79243278503418, "learning_rate": 4.941103448275862e-06, "loss": 0.29, "step": 13425 }, { "epoch": 0.13, "grad_norm": 27.802026748657227, "learning_rate": 4.940672413793104e-06, "loss": 0.5427, "step": 13450 }, { "epoch": 0.13, "grad_norm": 12.999860763549805, "learning_rate": 4.940241379310345e-06, "loss": 0.2614, "step": 13475 }, { "epoch": 0.13, "grad_norm": 18.832286834716797, "learning_rate": 4.939810344827587e-06, "loss": 0.4516, "step": 13500 }, { "epoch": 0.13, "grad_norm": 10.827120780944824, "learning_rate": 4.939379310344828e-06, "loss": 0.2581, "step": 13525 }, { "epoch": 0.13, "grad_norm": 28.639333724975586, "learning_rate": 4.93894827586207e-06, "loss": 0.4977, "step": 13550 }, { "epoch": 0.13, "grad_norm": 16.09276580810547, "learning_rate": 4.938517241379311e-06, "loss": 0.3093, "step": 13575 }, { "epoch": 0.13, "grad_norm": 15.508621215820312, "learning_rate": 4.9380862068965525e-06, "loss": 0.5463, "step": 13600 }, { "epoch": 0.13, "grad_norm": 7.604380130767822, "learning_rate": 4.937655172413793e-06, "loss": 0.2406, "step": 13625 }, { "epoch": 0.13, "grad_norm": 25.853809356689453, "learning_rate": 4.937224137931035e-06, "loss": 0.5585, "step": 13650 }, { "epoch": 0.13, "grad_norm": 11.331945419311523, "learning_rate": 4.936793103448276e-06, "loss": 0.2569, "step": 13675 }, { "epoch": 0.13, "grad_norm": 21.238683700561523, "learning_rate": 4.9363620689655175e-06, "loss": 0.5658, "step": 13700 }, { "epoch": 0.13, "grad_norm": 13.613617897033691, "learning_rate": 4.935931034482759e-06, "loss": 0.2749, "step": 13725 }, { "epoch": 0.14, "grad_norm": 18.522884368896484, "learning_rate": 4.9355000000000004e-06, "loss": 0.4348, "step": 13750 }, { "epoch": 0.14, "grad_norm": 6.621995449066162, "learning_rate": 4.935068965517242e-06, "loss": 0.2463, "step": 13775 }, { "epoch": 0.14, "grad_norm": 23.027116775512695, "learning_rate": 4.934637931034483e-06, "loss": 0.5331, "step": 13800 }, { "epoch": 0.14, "grad_norm": 7.884406566619873, "learning_rate": 4.934206896551725e-06, "loss": 0.2606, "step": 13825 }, { "epoch": 0.14, "grad_norm": 19.224834442138672, "learning_rate": 4.933775862068965e-06, "loss": 0.4301, "step": 13850 }, { "epoch": 0.14, "grad_norm": 13.71308708190918, "learning_rate": 4.933344827586207e-06, "loss": 0.3039, "step": 13875 }, { "epoch": 0.14, "grad_norm": 26.538970947265625, "learning_rate": 4.932913793103449e-06, "loss": 0.5558, "step": 13900 }, { "epoch": 0.14, "grad_norm": 10.72696304321289, "learning_rate": 4.932482758620691e-06, "loss": 0.2802, "step": 13925 }, { "epoch": 0.14, "grad_norm": 18.430330276489258, "learning_rate": 4.932051724137931e-06, "loss": 0.5097, "step": 13950 }, { "epoch": 0.14, "grad_norm": 10.094527244567871, "learning_rate": 4.931620689655173e-06, "loss": 0.2735, "step": 13975 }, { "epoch": 0.14, "grad_norm": 28.927541732788086, "learning_rate": 4.931189655172414e-06, "loss": 0.5394, "step": 14000 }, { "epoch": 0.14, "grad_norm": 10.523635864257812, "learning_rate": 4.930758620689656e-06, "loss": 0.2125, "step": 14025 }, { "epoch": 0.14, "grad_norm": 19.7420711517334, "learning_rate": 4.930327586206897e-06, "loss": 0.4634, "step": 14050 }, { "epoch": 0.14, "grad_norm": 18.913381576538086, "learning_rate": 4.9298965517241385e-06, "loss": 0.2734, "step": 14075 }, { "epoch": 0.14, "grad_norm": 23.644878387451172, "learning_rate": 4.92946551724138e-06, "loss": 0.4736, "step": 14100 }, { "epoch": 0.14, "grad_norm": 7.111321449279785, "learning_rate": 4.9290344827586214e-06, "loss": 0.2811, "step": 14125 }, { "epoch": 0.14, "grad_norm": 12.24442195892334, "learning_rate": 4.928603448275863e-06, "loss": 0.4492, "step": 14150 }, { "epoch": 0.14, "grad_norm": 10.01503849029541, "learning_rate": 4.9281724137931035e-06, "loss": 0.2732, "step": 14175 }, { "epoch": 0.14, "grad_norm": 19.93366813659668, "learning_rate": 4.927741379310345e-06, "loss": 0.5151, "step": 14200 }, { "epoch": 0.14, "grad_norm": 9.66781997680664, "learning_rate": 4.927310344827586e-06, "loss": 0.2998, "step": 14225 }, { "epoch": 0.14, "grad_norm": Infinity, "learning_rate": 4.926896551724139e-06, "loss": 0.5104, "step": 14250 }, { "epoch": 0.14, "grad_norm": 9.611762046813965, "learning_rate": 4.926465517241379e-06, "loss": 0.2468, "step": 14275 }, { "epoch": 0.14, "grad_norm": 17.789730072021484, "learning_rate": 4.926034482758621e-06, "loss": 0.5315, "step": 14300 }, { "epoch": 0.14, "grad_norm": 9.029945373535156, "learning_rate": 4.925603448275862e-06, "loss": 0.2222, "step": 14325 }, { "epoch": 0.14, "grad_norm": 17.598262786865234, "learning_rate": 4.925172413793104e-06, "loss": 0.4952, "step": 14350 }, { "epoch": 0.14, "grad_norm": 8.655625343322754, "learning_rate": 4.924741379310345e-06, "loss": 0.2399, "step": 14375 }, { "epoch": 0.14, "grad_norm": 24.175472259521484, "learning_rate": 4.924310344827587e-06, "loss": 0.5191, "step": 14400 }, { "epoch": 0.14, "grad_norm": 4.382723808288574, "learning_rate": 4.923879310344828e-06, "loss": 0.2391, "step": 14425 }, { "epoch": 0.14, "grad_norm": 26.78007698059082, "learning_rate": 4.9234482758620695e-06, "loss": 0.4712, "step": 14450 }, { "epoch": 0.14, "grad_norm": 12.143158912658691, "learning_rate": 4.923017241379311e-06, "loss": 0.3114, "step": 14475 }, { "epoch": 0.14, "grad_norm": 21.66142463684082, "learning_rate": 4.922586206896552e-06, "loss": 0.5751, "step": 14500 }, { "epoch": 0.14, "grad_norm": 13.520648002624512, "learning_rate": 4.922155172413793e-06, "loss": 0.2237, "step": 14525 }, { "epoch": 0.14, "grad_norm": 16.955215454101562, "learning_rate": 4.9217241379310345e-06, "loss": 0.4529, "step": 14550 }, { "epoch": 0.14, "grad_norm": 7.989325523376465, "learning_rate": 4.921293103448276e-06, "loss": 0.2553, "step": 14575 }, { "epoch": 0.14, "grad_norm": 20.798748016357422, "learning_rate": 4.920862068965518e-06, "loss": 0.4647, "step": 14600 }, { "epoch": 0.14, "grad_norm": 16.33477783203125, "learning_rate": 4.920431034482759e-06, "loss": 0.2525, "step": 14625 }, { "epoch": 0.14, "grad_norm": 22.542627334594727, "learning_rate": 4.92e-06, "loss": 0.4878, "step": 14650 }, { "epoch": 0.14, "grad_norm": 11.825023651123047, "learning_rate": 4.919568965517242e-06, "loss": 0.2933, "step": 14675 }, { "epoch": 0.14, "grad_norm": 21.34713363647461, "learning_rate": 4.919137931034483e-06, "loss": 0.4477, "step": 14700 }, { "epoch": 0.14, "grad_norm": 8.752665519714355, "learning_rate": 4.918706896551725e-06, "loss": 0.284, "step": 14725 }, { "epoch": 0.15, "grad_norm": 21.024642944335938, "learning_rate": 4.918275862068966e-06, "loss": 0.5059, "step": 14750 }, { "epoch": 0.15, "grad_norm": 12.885470390319824, "learning_rate": 4.917844827586208e-06, "loss": 0.2317, "step": 14775 }, { "epoch": 0.15, "grad_norm": 15.905805587768555, "learning_rate": 4.917413793103449e-06, "loss": 0.5422, "step": 14800 }, { "epoch": 0.15, "grad_norm": 11.211438179016113, "learning_rate": 4.9169827586206905e-06, "loss": 0.2443, "step": 14825 }, { "epoch": 0.15, "grad_norm": 18.60529136657715, "learning_rate": 4.916551724137931e-06, "loss": 0.4352, "step": 14850 }, { "epoch": 0.15, "grad_norm": 11.682415008544922, "learning_rate": 4.916120689655173e-06, "loss": 0.27, "step": 14875 }, { "epoch": 0.15, "grad_norm": 23.571025848388672, "learning_rate": 4.915689655172414e-06, "loss": 0.4722, "step": 14900 }, { "epoch": 0.15, "grad_norm": 13.7390775680542, "learning_rate": 4.9152586206896555e-06, "loss": 0.2673, "step": 14925 }, { "epoch": 0.15, "grad_norm": 19.69394874572754, "learning_rate": 4.914827586206897e-06, "loss": 0.5109, "step": 14950 }, { "epoch": 0.15, "grad_norm": 11.607175827026367, "learning_rate": 4.914396551724138e-06, "loss": 0.2937, "step": 14975 }, { "epoch": 0.15, "grad_norm": 23.771886825561523, "learning_rate": 4.91396551724138e-06, "loss": 0.4789, "step": 15000 }, { "epoch": 0.15, "grad_norm": 20.253019332885742, "learning_rate": 4.913534482758621e-06, "loss": 0.2537, "step": 15025 }, { "epoch": 0.15, "grad_norm": 17.76645278930664, "learning_rate": 4.913103448275863e-06, "loss": 0.4723, "step": 15050 }, { "epoch": 0.15, "grad_norm": 13.502947807312012, "learning_rate": 4.912672413793103e-06, "loss": 0.2899, "step": 15075 }, { "epoch": 0.15, "grad_norm": 24.221101760864258, "learning_rate": 4.912241379310345e-06, "loss": 0.5537, "step": 15100 }, { "epoch": 0.15, "grad_norm": 11.099861145019531, "learning_rate": 4.911810344827586e-06, "loss": 0.2356, "step": 15125 }, { "epoch": 0.15, "grad_norm": 14.826766967773438, "learning_rate": 4.911379310344828e-06, "loss": 0.4992, "step": 15150 }, { "epoch": 0.15, "grad_norm": 20.09890365600586, "learning_rate": 4.910948275862069e-06, "loss": 0.2681, "step": 15175 }, { "epoch": 0.15, "grad_norm": 20.31129264831543, "learning_rate": 4.910517241379311e-06, "loss": 0.5433, "step": 15200 }, { "epoch": 0.15, "grad_norm": 16.645641326904297, "learning_rate": 4.910086206896552e-06, "loss": 0.293, "step": 15225 }, { "epoch": 0.15, "grad_norm": 20.032533645629883, "learning_rate": 4.909655172413794e-06, "loss": 0.5917, "step": 15250 }, { "epoch": 0.15, "grad_norm": 9.997434616088867, "learning_rate": 4.909224137931035e-06, "loss": 0.2343, "step": 15275 }, { "epoch": 0.15, "grad_norm": 13.687545776367188, "learning_rate": 4.9087931034482765e-06, "loss": 0.4525, "step": 15300 }, { "epoch": 0.15, "grad_norm": 10.962838172912598, "learning_rate": 4.908362068965518e-06, "loss": 0.2533, "step": 15325 }, { "epoch": 0.15, "grad_norm": 13.253432273864746, "learning_rate": 4.907931034482759e-06, "loss": 0.5122, "step": 15350 }, { "epoch": 0.15, "grad_norm": 9.44445514678955, "learning_rate": 4.907500000000001e-06, "loss": 0.2186, "step": 15375 }, { "epoch": 0.15, "grad_norm": 17.069414138793945, "learning_rate": 4.9070689655172415e-06, "loss": 0.3915, "step": 15400 }, { "epoch": 0.15, "grad_norm": 21.63082504272461, "learning_rate": 4.906637931034483e-06, "loss": 0.2762, "step": 15425 }, { "epoch": 0.15, "grad_norm": 27.29813003540039, "learning_rate": 4.906206896551724e-06, "loss": 0.5399, "step": 15450 }, { "epoch": 0.15, "grad_norm": 11.224752426147461, "learning_rate": 4.905775862068966e-06, "loss": 0.3226, "step": 15475 }, { "epoch": 0.15, "grad_norm": 23.104719161987305, "learning_rate": 4.905344827586207e-06, "loss": 0.4682, "step": 15500 }, { "epoch": 0.15, "grad_norm": 14.500164985656738, "learning_rate": 4.904913793103449e-06, "loss": 0.2538, "step": 15525 }, { "epoch": 0.15, "grad_norm": 23.350704193115234, "learning_rate": 4.90448275862069e-06, "loss": 0.5061, "step": 15550 }, { "epoch": 0.15, "grad_norm": 16.035615921020508, "learning_rate": 4.904051724137932e-06, "loss": 0.2668, "step": 15575 }, { "epoch": 0.15, "grad_norm": 20.904163360595703, "learning_rate": 4.903620689655173e-06, "loss": 0.4824, "step": 15600 }, { "epoch": 0.15, "grad_norm": 9.874075889587402, "learning_rate": 4.903189655172414e-06, "loss": 0.2498, "step": 15625 }, { "epoch": 0.15, "grad_norm": 12.929283142089844, "learning_rate": 4.902758620689655e-06, "loss": 0.4714, "step": 15650 }, { "epoch": 0.15, "grad_norm": 6.089432239532471, "learning_rate": 4.902327586206897e-06, "loss": 0.2143, "step": 15675 }, { "epoch": 0.15, "grad_norm": 28.87601089477539, "learning_rate": 4.901896551724138e-06, "loss": 0.407, "step": 15700 }, { "epoch": 0.15, "grad_norm": 17.921049118041992, "learning_rate": 4.9014655172413796e-06, "loss": 0.2282, "step": 15725 }, { "epoch": 0.15, "grad_norm": 10.2708101272583, "learning_rate": 4.901034482758621e-06, "loss": 0.454, "step": 15750 }, { "epoch": 0.16, "grad_norm": 11.56557559967041, "learning_rate": 4.9006034482758625e-06, "loss": 0.2446, "step": 15775 }, { "epoch": 0.16, "grad_norm": 18.421323776245117, "learning_rate": 4.900172413793104e-06, "loss": 0.5049, "step": 15800 }, { "epoch": 0.16, "grad_norm": 6.543795585632324, "learning_rate": 4.899741379310345e-06, "loss": 0.2649, "step": 15825 }, { "epoch": 0.16, "grad_norm": 21.604896545410156, "learning_rate": 4.899310344827586e-06, "loss": 0.5224, "step": 15850 }, { "epoch": 0.16, "grad_norm": 12.646187782287598, "learning_rate": 4.898879310344828e-06, "loss": 0.233, "step": 15875 }, { "epoch": 0.16, "grad_norm": 23.608230590820312, "learning_rate": 4.89844827586207e-06, "loss": 0.5836, "step": 15900 }, { "epoch": 0.16, "grad_norm": 10.624305725097656, "learning_rate": 4.898017241379311e-06, "loss": 0.2469, "step": 15925 }, { "epoch": 0.16, "grad_norm": 16.034570693969727, "learning_rate": 4.897586206896553e-06, "loss": 0.5205, "step": 15950 }, { "epoch": 0.16, "grad_norm": 10.94775676727295, "learning_rate": 4.897155172413793e-06, "loss": 0.2991, "step": 15975 }, { "epoch": 0.16, "grad_norm": 18.459211349487305, "learning_rate": 4.896724137931035e-06, "loss": 0.4215, "step": 16000 }, { "epoch": 0.16, "grad_norm": 17.74468994140625, "learning_rate": 4.896293103448276e-06, "loss": 0.2825, "step": 16025 }, { "epoch": 0.16, "grad_norm": 23.39765739440918, "learning_rate": 4.895862068965518e-06, "loss": 0.4895, "step": 16050 }, { "epoch": 0.16, "grad_norm": 11.388063430786133, "learning_rate": 4.895431034482759e-06, "loss": 0.2624, "step": 16075 }, { "epoch": 0.16, "grad_norm": 17.853580474853516, "learning_rate": 4.8950000000000006e-06, "loss": 0.5399, "step": 16100 }, { "epoch": 0.16, "grad_norm": 8.656503677368164, "learning_rate": 4.894568965517242e-06, "loss": 0.2819, "step": 16125 }, { "epoch": 0.16, "grad_norm": 24.11385726928711, "learning_rate": 4.8941379310344835e-06, "loss": 0.4749, "step": 16150 }, { "epoch": 0.16, "grad_norm": 8.419092178344727, "learning_rate": 4.893706896551724e-06, "loss": 0.2658, "step": 16175 }, { "epoch": 0.16, "grad_norm": 22.98084259033203, "learning_rate": 4.8932758620689655e-06, "loss": 0.4514, "step": 16200 }, { "epoch": 0.16, "grad_norm": 10.355072021484375, "learning_rate": 4.892844827586207e-06, "loss": 0.2545, "step": 16225 }, { "epoch": 0.16, "grad_norm": 20.043973922729492, "learning_rate": 4.8924137931034485e-06, "loss": 0.4538, "step": 16250 }, { "epoch": 0.16, "grad_norm": 12.010132789611816, "learning_rate": 4.89198275862069e-06, "loss": 0.2605, "step": 16275 }, { "epoch": 0.16, "grad_norm": 26.45646095275879, "learning_rate": 4.891551724137931e-06, "loss": 0.5143, "step": 16300 }, { "epoch": 0.16, "grad_norm": 12.663009643554688, "learning_rate": 4.891120689655173e-06, "loss": 0.3208, "step": 16325 }, { "epoch": 0.16, "grad_norm": 19.252849578857422, "learning_rate": 4.890689655172414e-06, "loss": 0.576, "step": 16350 }, { "epoch": 0.16, "grad_norm": 15.945219993591309, "learning_rate": 4.890258620689656e-06, "loss": 0.2394, "step": 16375 }, { "epoch": 0.16, "grad_norm": 26.599224090576172, "learning_rate": 4.889827586206896e-06, "loss": 0.4809, "step": 16400 }, { "epoch": 0.16, "grad_norm": 11.268331527709961, "learning_rate": 4.889396551724138e-06, "loss": 0.2202, "step": 16425 }, { "epoch": 0.16, "grad_norm": 20.874784469604492, "learning_rate": 4.88896551724138e-06, "loss": 0.4734, "step": 16450 }, { "epoch": 0.16, "grad_norm": 11.193197250366211, "learning_rate": 4.8885344827586216e-06, "loss": 0.2626, "step": 16475 }, { "epoch": 0.16, "grad_norm": 15.316932678222656, "learning_rate": 4.888103448275863e-06, "loss": 0.5137, "step": 16500 }, { "epoch": 0.16, "grad_norm": 10.78478717803955, "learning_rate": 4.887672413793104e-06, "loss": 0.2702, "step": 16525 }, { "epoch": 0.16, "grad_norm": 19.258346557617188, "learning_rate": 4.887241379310345e-06, "loss": 0.4634, "step": 16550 }, { "epoch": 0.16, "grad_norm": 6.913346290588379, "learning_rate": 4.8868103448275865e-06, "loss": 0.2379, "step": 16575 }, { "epoch": 0.16, "grad_norm": 22.695899963378906, "learning_rate": 4.886379310344828e-06, "loss": 0.5589, "step": 16600 }, { "epoch": 0.16, "grad_norm": 13.313573837280273, "learning_rate": 4.8859482758620695e-06, "loss": 0.2356, "step": 16625 }, { "epoch": 0.16, "grad_norm": 22.12359046936035, "learning_rate": 4.885517241379311e-06, "loss": 0.4492, "step": 16650 }, { "epoch": 0.16, "grad_norm": 16.84253692626953, "learning_rate": 4.885086206896552e-06, "loss": 0.2779, "step": 16675 }, { "epoch": 0.16, "grad_norm": 21.07940101623535, "learning_rate": 4.884655172413794e-06, "loss": 0.5788, "step": 16700 }, { "epoch": 0.16, "grad_norm": 12.163787841796875, "learning_rate": 4.884224137931035e-06, "loss": 0.2536, "step": 16725 }, { "epoch": 0.16, "grad_norm": 22.4648494720459, "learning_rate": 4.883793103448276e-06, "loss": 0.4854, "step": 16750 }, { "epoch": 0.16, "grad_norm": 12.922937393188477, "learning_rate": 4.883362068965517e-06, "loss": 0.2912, "step": 16775 }, { "epoch": 0.17, "grad_norm": 15.155232429504395, "learning_rate": 4.882931034482759e-06, "loss": 0.5101, "step": 16800 }, { "epoch": 0.17, "grad_norm": 18.803333282470703, "learning_rate": 4.8825e-06, "loss": 0.2817, "step": 16825 }, { "epoch": 0.17, "grad_norm": 18.047279357910156, "learning_rate": 4.882068965517242e-06, "loss": 0.4346, "step": 16850 }, { "epoch": 0.17, "grad_norm": 14.092240333557129, "learning_rate": 4.881637931034483e-06, "loss": 0.2774, "step": 16875 }, { "epoch": 0.17, "grad_norm": 12.91687297821045, "learning_rate": 4.881206896551725e-06, "loss": 0.5228, "step": 16900 }, { "epoch": 0.17, "grad_norm": 10.75521183013916, "learning_rate": 4.880775862068966e-06, "loss": 0.2442, "step": 16925 }, { "epoch": 0.17, "grad_norm": 10.173538208007812, "learning_rate": 4.8803448275862075e-06, "loss": 0.5162, "step": 16950 }, { "epoch": 0.17, "grad_norm": 13.289297103881836, "learning_rate": 4.879913793103448e-06, "loss": 0.2922, "step": 16975 }, { "epoch": 0.17, "grad_norm": 23.464231491088867, "learning_rate": 4.87948275862069e-06, "loss": 0.4874, "step": 17000 }, { "epoch": 0.17, "grad_norm": 11.453228950500488, "learning_rate": 4.879051724137931e-06, "loss": 0.2219, "step": 17025 }, { "epoch": 0.17, "grad_norm": 25.9354248046875, "learning_rate": 4.878620689655173e-06, "loss": 0.44, "step": 17050 }, { "epoch": 0.17, "grad_norm": 12.81757926940918, "learning_rate": 4.878189655172414e-06, "loss": 0.3035, "step": 17075 }, { "epoch": 0.17, "grad_norm": 22.228715896606445, "learning_rate": 4.8777586206896554e-06, "loss": 0.5848, "step": 17100 }, { "epoch": 0.17, "grad_norm": 12.737390518188477, "learning_rate": 4.877327586206897e-06, "loss": 0.2822, "step": 17125 }, { "epoch": 0.17, "grad_norm": 18.37127113342285, "learning_rate": 4.876896551724138e-06, "loss": 0.46, "step": 17150 }, { "epoch": 0.17, "grad_norm": 13.870956420898438, "learning_rate": 4.87646551724138e-06, "loss": 0.2897, "step": 17175 }, { "epoch": 0.17, "grad_norm": 17.449155807495117, "learning_rate": 4.876034482758621e-06, "loss": 0.5585, "step": 17200 }, { "epoch": 0.17, "grad_norm": 16.6773738861084, "learning_rate": 4.875603448275863e-06, "loss": 0.2735, "step": 17225 }, { "epoch": 0.17, "grad_norm": 18.930505752563477, "learning_rate": 4.875172413793104e-06, "loss": 0.5095, "step": 17250 }, { "epoch": 0.17, "grad_norm": 14.482077598571777, "learning_rate": 4.874741379310346e-06, "loss": 0.2696, "step": 17275 }, { "epoch": 0.17, "grad_norm": 24.562732696533203, "learning_rate": 4.874310344827586e-06, "loss": 0.4918, "step": 17300 }, { "epoch": 0.17, "grad_norm": 10.62913703918457, "learning_rate": 4.873879310344828e-06, "loss": 0.2328, "step": 17325 }, { "epoch": 0.17, "grad_norm": 20.07575225830078, "learning_rate": 4.873448275862069e-06, "loss": 0.4987, "step": 17350 }, { "epoch": 0.17, "grad_norm": 12.391975402832031, "learning_rate": 4.873017241379311e-06, "loss": 0.2204, "step": 17375 }, { "epoch": 0.17, "grad_norm": 17.68683624267578, "learning_rate": 4.872586206896552e-06, "loss": 0.5021, "step": 17400 }, { "epoch": 0.17, "grad_norm": 14.097087860107422, "learning_rate": 4.8721551724137935e-06, "loss": 0.3049, "step": 17425 }, { "epoch": 0.17, "grad_norm": 20.811016082763672, "learning_rate": 4.871724137931035e-06, "loss": 0.4886, "step": 17450 }, { "epoch": 0.17, "grad_norm": 11.925214767456055, "learning_rate": 4.8712931034482764e-06, "loss": 0.2517, "step": 17475 }, { "epoch": 0.17, "grad_norm": 21.409170150756836, "learning_rate": 4.870862068965518e-06, "loss": 0.4685, "step": 17500 }, { "epoch": 0.17, "grad_norm": 14.471060752868652, "learning_rate": 4.8704310344827585e-06, "loss": 0.2785, "step": 17525 }, { "epoch": 0.17, "grad_norm": 17.812450408935547, "learning_rate": 4.87e-06, "loss": 0.4875, "step": 17550 }, { "epoch": 0.17, "grad_norm": 8.228697776794434, "learning_rate": 4.869568965517241e-06, "loss": 0.2647, "step": 17575 }, { "epoch": 0.17, "grad_norm": 24.505847930908203, "learning_rate": 4.869137931034483e-06, "loss": 0.4506, "step": 17600 }, { "epoch": 0.17, "grad_norm": 8.705809593200684, "learning_rate": 4.868706896551725e-06, "loss": 0.2244, "step": 17625 }, { "epoch": 0.17, "grad_norm": 27.288930892944336, "learning_rate": 4.868275862068966e-06, "loss": 0.4294, "step": 17650 }, { "epoch": 0.17, "grad_norm": 11.107038497924805, "learning_rate": 4.867844827586207e-06, "loss": 0.2599, "step": 17675 }, { "epoch": 0.17, "grad_norm": 21.10015106201172, "learning_rate": 4.867413793103449e-06, "loss": 0.507, "step": 17700 }, { "epoch": 0.17, "grad_norm": 33.9884033203125, "learning_rate": 4.86698275862069e-06, "loss": 0.2781, "step": 17725 }, { "epoch": 0.17, "grad_norm": 25.921133041381836, "learning_rate": 4.866551724137932e-06, "loss": 0.5244, "step": 17750 }, { "epoch": 0.17, "grad_norm": 10.02739429473877, "learning_rate": 4.866120689655173e-06, "loss": 0.2409, "step": 17775 }, { "epoch": 0.18, "grad_norm": 25.005924224853516, "learning_rate": 4.8656896551724145e-06, "loss": 0.4858, "step": 17800 }, { "epoch": 0.18, "grad_norm": 11.995366096496582, "learning_rate": 4.865258620689656e-06, "loss": 0.2993, "step": 17825 }, { "epoch": 0.18, "grad_norm": 19.42953109741211, "learning_rate": 4.8648275862068974e-06, "loss": 0.4187, "step": 17850 }, { "epoch": 0.18, "grad_norm": 9.772110939025879, "learning_rate": 4.864396551724138e-06, "loss": 0.223, "step": 17875 }, { "epoch": 0.18, "grad_norm": 16.11791229248047, "learning_rate": 4.8639655172413795e-06, "loss": 0.5213, "step": 17900 }, { "epoch": 0.18, "grad_norm": 11.50186538696289, "learning_rate": 4.863534482758621e-06, "loss": 0.2694, "step": 17925 }, { "epoch": 0.18, "grad_norm": 25.489192962646484, "learning_rate": 4.863103448275862e-06, "loss": 0.5077, "step": 17950 }, { "epoch": 0.18, "grad_norm": 15.507953643798828, "learning_rate": 4.862672413793104e-06, "loss": 0.2888, "step": 17975 }, { "epoch": 0.18, "grad_norm": 14.676325798034668, "learning_rate": 4.862241379310345e-06, "loss": 0.5245, "step": 18000 }, { "epoch": 0.18, "grad_norm": 9.554773330688477, "learning_rate": 4.861810344827587e-06, "loss": 0.2251, "step": 18025 }, { "epoch": 0.18, "grad_norm": 22.679410934448242, "learning_rate": 4.861379310344828e-06, "loss": 0.5289, "step": 18050 }, { "epoch": 0.18, "grad_norm": 8.281892776489258, "learning_rate": 4.86094827586207e-06, "loss": 0.2583, "step": 18075 }, { "epoch": 0.18, "grad_norm": 38.86891555786133, "learning_rate": 4.86051724137931e-06, "loss": 0.4936, "step": 18100 }, { "epoch": 0.18, "grad_norm": 9.176267623901367, "learning_rate": 4.860086206896552e-06, "loss": 0.2415, "step": 18125 }, { "epoch": 0.18, "grad_norm": 22.605369567871094, "learning_rate": 4.859655172413793e-06, "loss": 0.4598, "step": 18150 }, { "epoch": 0.18, "grad_norm": 15.79533576965332, "learning_rate": 4.859224137931035e-06, "loss": 0.2675, "step": 18175 }, { "epoch": 0.18, "grad_norm": 22.031721115112305, "learning_rate": 4.858793103448276e-06, "loss": 0.5116, "step": 18200 }, { "epoch": 0.18, "grad_norm": 8.246376037597656, "learning_rate": 4.858362068965518e-06, "loss": 0.2596, "step": 18225 }, { "epoch": 0.18, "grad_norm": 23.353422164916992, "learning_rate": 4.857931034482759e-06, "loss": 0.4607, "step": 18250 }, { "epoch": 0.18, "grad_norm": 13.99295711517334, "learning_rate": 4.8575000000000005e-06, "loss": 0.2384, "step": 18275 }, { "epoch": 0.18, "grad_norm": 22.65328598022461, "learning_rate": 4.857086206896552e-06, "loss": 0.462, "step": 18300 }, { "epoch": 0.18, "grad_norm": 7.985040664672852, "learning_rate": 4.856655172413793e-06, "loss": 0.2729, "step": 18325 }, { "epoch": 0.18, "grad_norm": 20.4624080657959, "learning_rate": 4.856224137931035e-06, "loss": 0.4722, "step": 18350 }, { "epoch": 0.18, "grad_norm": 17.183523178100586, "learning_rate": 4.855793103448276e-06, "loss": 0.2706, "step": 18375 }, { "epoch": 0.18, "grad_norm": 12.772433280944824, "learning_rate": 4.855362068965518e-06, "loss": 0.546, "step": 18400 }, { "epoch": 0.18, "grad_norm": 9.834966659545898, "learning_rate": 4.854931034482758e-06, "loss": 0.248, "step": 18425 }, { "epoch": 0.18, "grad_norm": 24.44236183166504, "learning_rate": 4.854500000000001e-06, "loss": 0.4773, "step": 18450 }, { "epoch": 0.18, "grad_norm": 17.294082641601562, "learning_rate": 4.854068965517242e-06, "loss": 0.2438, "step": 18475 }, { "epoch": 0.18, "grad_norm": 15.031556129455566, "learning_rate": 4.853637931034484e-06, "loss": 0.515, "step": 18500 }, { "epoch": 0.18, "grad_norm": 9.329778671264648, "learning_rate": 4.853206896551724e-06, "loss": 0.2288, "step": 18525 }, { "epoch": 0.18, "grad_norm": 22.542728424072266, "learning_rate": 4.852775862068966e-06, "loss": 0.4961, "step": 18550 }, { "epoch": 0.18, "grad_norm": 15.997159957885742, "learning_rate": 4.852344827586207e-06, "loss": 0.2524, "step": 18575 }, { "epoch": 0.18, "grad_norm": 23.6511287689209, "learning_rate": 4.851913793103449e-06, "loss": 0.4979, "step": 18600 }, { "epoch": 0.18, "grad_norm": 7.316786289215088, "learning_rate": 4.85148275862069e-06, "loss": 0.2084, "step": 18625 }, { "epoch": 0.18, "grad_norm": 14.833648681640625, "learning_rate": 4.8510517241379315e-06, "loss": 0.4281, "step": 18650 }, { "epoch": 0.18, "grad_norm": 7.127645492553711, "learning_rate": 4.850620689655173e-06, "loss": 0.2798, "step": 18675 }, { "epoch": 0.18, "grad_norm": 19.55740737915039, "learning_rate": 4.850189655172414e-06, "loss": 0.561, "step": 18700 }, { "epoch": 0.18, "grad_norm": 11.46512222290039, "learning_rate": 4.849758620689656e-06, "loss": 0.2822, "step": 18725 }, { "epoch": 0.18, "grad_norm": Infinity, "learning_rate": 4.849344827586207e-06, "loss": 0.447, "step": 18750 }, { "epoch": 0.18, "grad_norm": 7.168278694152832, "learning_rate": 4.848913793103449e-06, "loss": 0.2601, "step": 18775 }, { "epoch": 0.18, "grad_norm": 20.078847885131836, "learning_rate": 4.84848275862069e-06, "loss": 0.5105, "step": 18800 }, { "epoch": 0.19, "grad_norm": 9.314231872558594, "learning_rate": 4.848051724137932e-06, "loss": 0.2572, "step": 18825 }, { "epoch": 0.19, "grad_norm": 21.44048500061035, "learning_rate": 4.847620689655173e-06, "loss": 0.5137, "step": 18850 }, { "epoch": 0.19, "grad_norm": 5.724674224853516, "learning_rate": 4.847189655172414e-06, "loss": 0.2415, "step": 18875 }, { "epoch": 0.19, "grad_norm": 19.820171356201172, "learning_rate": 4.846758620689655e-06, "loss": 0.513, "step": 18900 }, { "epoch": 0.19, "grad_norm": 13.670063972473145, "learning_rate": 4.846327586206897e-06, "loss": 0.198, "step": 18925 }, { "epoch": 0.19, "grad_norm": 15.519083023071289, "learning_rate": 4.845896551724138e-06, "loss": 0.4356, "step": 18950 }, { "epoch": 0.19, "grad_norm": 8.70222282409668, "learning_rate": 4.84546551724138e-06, "loss": 0.2913, "step": 18975 }, { "epoch": 0.19, "grad_norm": 37.15939712524414, "learning_rate": 4.845034482758621e-06, "loss": 0.4769, "step": 19000 }, { "epoch": 0.19, "grad_norm": 13.047447204589844, "learning_rate": 4.8446034482758625e-06, "loss": 0.2361, "step": 19025 }, { "epoch": 0.19, "grad_norm": 21.19569969177246, "learning_rate": 4.844172413793104e-06, "loss": 0.4607, "step": 19050 }, { "epoch": 0.19, "grad_norm": 11.971068382263184, "learning_rate": 4.843741379310345e-06, "loss": 0.2223, "step": 19075 }, { "epoch": 0.19, "grad_norm": 22.13616180419922, "learning_rate": 4.843310344827586e-06, "loss": 0.5078, "step": 19100 }, { "epoch": 0.19, "grad_norm": 10.628336906433105, "learning_rate": 4.842879310344828e-06, "loss": 0.2537, "step": 19125 }, { "epoch": 0.19, "grad_norm": 16.230873107910156, "learning_rate": 4.84244827586207e-06, "loss": 0.4824, "step": 19150 }, { "epoch": 0.19, "grad_norm": 12.015116691589355, "learning_rate": 4.842017241379311e-06, "loss": 0.2597, "step": 19175 }, { "epoch": 0.19, "grad_norm": 15.558761596679688, "learning_rate": 4.841586206896552e-06, "loss": 0.5166, "step": 19200 }, { "epoch": 0.19, "grad_norm": 12.922188758850098, "learning_rate": 4.841155172413793e-06, "loss": 0.2487, "step": 19225 }, { "epoch": 0.19, "grad_norm": 16.717721939086914, "learning_rate": 4.840724137931035e-06, "loss": 0.409, "step": 19250 }, { "epoch": 0.19, "grad_norm": 13.480483055114746, "learning_rate": 4.840293103448276e-06, "loss": 0.2852, "step": 19275 }, { "epoch": 0.19, "grad_norm": 21.55359649658203, "learning_rate": 4.839862068965518e-06, "loss": 0.5078, "step": 19300 }, { "epoch": 0.19, "grad_norm": 14.426997184753418, "learning_rate": 4.839431034482759e-06, "loss": 0.2645, "step": 19325 }, { "epoch": 0.19, "grad_norm": 24.651124954223633, "learning_rate": 4.839000000000001e-06, "loss": 0.4423, "step": 19350 }, { "epoch": 0.19, "grad_norm": 13.202954292297363, "learning_rate": 4.838568965517242e-06, "loss": 0.2682, "step": 19375 }, { "epoch": 0.19, "grad_norm": 30.582162857055664, "learning_rate": 4.8381379310344835e-06, "loss": 0.4715, "step": 19400 }, { "epoch": 0.19, "grad_norm": 11.332019805908203, "learning_rate": 4.837706896551724e-06, "loss": 0.2561, "step": 19425 }, { "epoch": 0.19, "grad_norm": 16.934476852416992, "learning_rate": 4.8372758620689656e-06, "loss": 0.4753, "step": 19450 }, { "epoch": 0.19, "grad_norm": 19.562463760375977, "learning_rate": 4.836844827586207e-06, "loss": 0.2507, "step": 19475 }, { "epoch": 0.19, "grad_norm": 19.821258544921875, "learning_rate": 4.8364137931034485e-06, "loss": 0.538, "step": 19500 }, { "epoch": 0.19, "grad_norm": 11.521300315856934, "learning_rate": 4.83598275862069e-06, "loss": 0.2404, "step": 19525 }, { "epoch": 0.19, "grad_norm": 26.56728744506836, "learning_rate": 4.835551724137931e-06, "loss": 0.5161, "step": 19550 }, { "epoch": 0.19, "grad_norm": 11.617195129394531, "learning_rate": 4.835120689655173e-06, "loss": 0.2786, "step": 19575 }, { "epoch": 0.19, "grad_norm": 20.94119644165039, "learning_rate": 4.834689655172414e-06, "loss": 0.5049, "step": 19600 }, { "epoch": 0.19, "grad_norm": 9.802326202392578, "learning_rate": 4.834258620689656e-06, "loss": 0.243, "step": 19625 }, { "epoch": 0.19, "grad_norm": 20.462886810302734, "learning_rate": 4.833827586206896e-06, "loss": 0.4982, "step": 19650 }, { "epoch": 0.19, "grad_norm": 12.332572937011719, "learning_rate": 4.833396551724138e-06, "loss": 0.2836, "step": 19675 }, { "epoch": 0.19, "grad_norm": 18.483991622924805, "learning_rate": 4.83296551724138e-06, "loss": 0.4682, "step": 19700 }, { "epoch": 0.19, "grad_norm": 11.439650535583496, "learning_rate": 4.832534482758622e-06, "loss": 0.3029, "step": 19725 }, { "epoch": 0.19, "grad_norm": 25.50374984741211, "learning_rate": 4.832103448275862e-06, "loss": 0.5201, "step": 19750 }, { "epoch": 0.19, "grad_norm": 11.838266372680664, "learning_rate": 4.831672413793104e-06, "loss": 0.2578, "step": 19775 }, { "epoch": 0.19, "grad_norm": 22.79339599609375, "learning_rate": 4.831241379310345e-06, "loss": 0.494, "step": 19800 }, { "epoch": 0.19, "grad_norm": 9.522798538208008, "learning_rate": 4.8308103448275866e-06, "loss": 0.2917, "step": 19825 }, { "epoch": 0.2, "grad_norm": 21.346099853515625, "learning_rate": 4.830379310344828e-06, "loss": 0.4879, "step": 19850 }, { "epoch": 0.2, "grad_norm": 15.268062591552734, "learning_rate": 4.8299482758620695e-06, "loss": 0.2374, "step": 19875 }, { "epoch": 0.2, "grad_norm": 19.220205307006836, "learning_rate": 4.829517241379311e-06, "loss": 0.5087, "step": 19900 }, { "epoch": 0.2, "grad_norm": 13.423375129699707, "learning_rate": 4.829086206896552e-06, "loss": 0.2325, "step": 19925 }, { "epoch": 0.2, "grad_norm": 16.33538246154785, "learning_rate": 4.828655172413794e-06, "loss": 0.4484, "step": 19950 }, { "epoch": 0.2, "grad_norm": 13.844887733459473, "learning_rate": 4.8282241379310345e-06, "loss": 0.2254, "step": 19975 }, { "epoch": 0.2, "grad_norm": 19.621601104736328, "learning_rate": 4.827793103448276e-06, "loss": 0.4574, "step": 20000 }, { "epoch": 0.2, "eval_loss": 0.5338913202285767, "eval_runtime": 5851.7593, "eval_samples_per_second": 1.618, "eval_steps_per_second": 0.202, "eval_wer": 0.16305607416468165, "step": 20000 }, { "epoch": 0.2, "grad_norm": 14.685494422912598, "learning_rate": 4.827362068965517e-06, "loss": 0.3086, "step": 20025 }, { "epoch": 0.2, "grad_norm": 13.54391860961914, "learning_rate": 4.826931034482759e-06, "loss": 0.4934, "step": 20050 }, { "epoch": 0.2, "grad_norm": 10.239843368530273, "learning_rate": 4.8265e-06, "loss": 0.2529, "step": 20075 }, { "epoch": 0.2, "grad_norm": 15.576516151428223, "learning_rate": 4.826068965517242e-06, "loss": 0.4962, "step": 20100 }, { "epoch": 0.2, "grad_norm": 19.964025497436523, "learning_rate": 4.825637931034483e-06, "loss": 0.2218, "step": 20125 }, { "epoch": 0.2, "grad_norm": 27.849924087524414, "learning_rate": 4.825206896551725e-06, "loss": 0.5148, "step": 20150 }, { "epoch": 0.2, "grad_norm": 6.614483833312988, "learning_rate": 4.824775862068966e-06, "loss": 0.2673, "step": 20175 }, { "epoch": 0.2, "grad_norm": 15.071307182312012, "learning_rate": 4.824344827586207e-06, "loss": 0.4663, "step": 20200 }, { "epoch": 0.2, "grad_norm": 11.350460052490234, "learning_rate": 4.823913793103448e-06, "loss": 0.2645, "step": 20225 }, { "epoch": 0.2, "grad_norm": 22.313478469848633, "learning_rate": 4.82348275862069e-06, "loss": 0.4775, "step": 20250 }, { "epoch": 0.2, "grad_norm": 12.421259880065918, "learning_rate": 4.823051724137932e-06, "loss": 0.1995, "step": 20275 }, { "epoch": 0.2, "grad_norm": 15.25833797454834, "learning_rate": 4.822620689655173e-06, "loss": 0.5287, "step": 20300 }, { "epoch": 0.2, "grad_norm": 10.72403335571289, "learning_rate": 4.822189655172414e-06, "loss": 0.29, "step": 20325 }, { "epoch": 0.2, "grad_norm": 19.862319946289062, "learning_rate": 4.8217586206896555e-06, "loss": 0.4956, "step": 20350 }, { "epoch": 0.2, "grad_norm": 11.369314193725586, "learning_rate": 4.821327586206897e-06, "loss": 0.2323, "step": 20375 }, { "epoch": 0.2, "grad_norm": 23.099599838256836, "learning_rate": 4.820896551724138e-06, "loss": 0.5153, "step": 20400 }, { "epoch": 0.2, "grad_norm": 11.441277503967285, "learning_rate": 4.82046551724138e-06, "loss": 0.2302, "step": 20425 }, { "epoch": 0.2, "grad_norm": 16.24232292175293, "learning_rate": 4.820034482758621e-06, "loss": 0.469, "step": 20450 }, { "epoch": 0.2, "grad_norm": 12.968703269958496, "learning_rate": 4.819603448275863e-06, "loss": 0.2111, "step": 20475 }, { "epoch": 0.2, "grad_norm": 23.245580673217773, "learning_rate": 4.819172413793104e-06, "loss": 0.4687, "step": 20500 }, { "epoch": 0.2, "grad_norm": 10.258557319641113, "learning_rate": 4.818741379310346e-06, "loss": 0.2255, "step": 20525 }, { "epoch": 0.2, "grad_norm": 16.687952041625977, "learning_rate": 4.818310344827586e-06, "loss": 0.4359, "step": 20550 }, { "epoch": 0.2, "grad_norm": 11.323150634765625, "learning_rate": 4.817879310344828e-06, "loss": 0.2494, "step": 20575 }, { "epoch": 0.2, "grad_norm": 12.639399528503418, "learning_rate": 4.817448275862069e-06, "loss": 0.4511, "step": 20600 }, { "epoch": 0.2, "grad_norm": 9.003030776977539, "learning_rate": 4.817017241379311e-06, "loss": 0.2303, "step": 20625 }, { "epoch": 0.2, "grad_norm": 16.253631591796875, "learning_rate": 4.816586206896552e-06, "loss": 0.5093, "step": 20650 }, { "epoch": 0.2, "grad_norm": 13.101349830627441, "learning_rate": 4.8161551724137936e-06, "loss": 0.2551, "step": 20675 }, { "epoch": 0.2, "grad_norm": 12.774646759033203, "learning_rate": 4.815724137931035e-06, "loss": 0.4188, "step": 20700 }, { "epoch": 0.2, "grad_norm": 18.34294319152832, "learning_rate": 4.8152931034482765e-06, "loss": 0.2249, "step": 20725 }, { "epoch": 0.2, "grad_norm": 13.445023536682129, "learning_rate": 4.814862068965518e-06, "loss": 0.4745, "step": 20750 }, { "epoch": 0.2, "grad_norm": 16.149688720703125, "learning_rate": 4.8144310344827585e-06, "loss": 0.2677, "step": 20775 }, { "epoch": 0.2, "grad_norm": 15.304931640625, "learning_rate": 4.814e-06, "loss": 0.5675, "step": 20800 }, { "epoch": 0.2, "grad_norm": 10.208545684814453, "learning_rate": 4.8135689655172414e-06, "loss": 0.3077, "step": 20825 }, { "epoch": 0.21, "grad_norm": 16.828617095947266, "learning_rate": 4.813137931034484e-06, "loss": 0.4971, "step": 20850 }, { "epoch": 0.21, "grad_norm": 11.225972175598145, "learning_rate": 4.812706896551724e-06, "loss": 0.2534, "step": 20875 }, { "epoch": 0.21, "grad_norm": 16.523033142089844, "learning_rate": 4.812275862068966e-06, "loss": 0.5021, "step": 20900 }, { "epoch": 0.21, "grad_norm": 8.882723808288574, "learning_rate": 4.811844827586207e-06, "loss": 0.2392, "step": 20925 }, { "epoch": 0.21, "grad_norm": 19.810237884521484, "learning_rate": 4.8114310344827596e-06, "loss": 0.4747, "step": 20950 }, { "epoch": 0.21, "grad_norm": 13.99474811553955, "learning_rate": 4.811000000000001e-06, "loss": 0.2274, "step": 20975 }, { "epoch": 0.21, "grad_norm": 19.10816192626953, "learning_rate": 4.810568965517242e-06, "loss": 0.407, "step": 21000 }, { "epoch": 0.21, "grad_norm": 13.676546096801758, "learning_rate": 4.810137931034483e-06, "loss": 0.3069, "step": 21025 }, { "epoch": 0.21, "grad_norm": 23.849273681640625, "learning_rate": 4.8097068965517246e-06, "loss": 0.4371, "step": 21050 }, { "epoch": 0.21, "grad_norm": 12.083322525024414, "learning_rate": 4.809275862068966e-06, "loss": 0.2187, "step": 21075 }, { "epoch": 0.21, "grad_norm": 15.093894004821777, "learning_rate": 4.8088448275862075e-06, "loss": 0.4782, "step": 21100 }, { "epoch": 0.21, "grad_norm": 7.161066055297852, "learning_rate": 4.808413793103449e-06, "loss": 0.2367, "step": 21125 }, { "epoch": 0.21, "grad_norm": 30.060348510742188, "learning_rate": 4.80798275862069e-06, "loss": 0.5085, "step": 21150 }, { "epoch": 0.21, "grad_norm": 7.40391206741333, "learning_rate": 4.807551724137932e-06, "loss": 0.2529, "step": 21175 }, { "epoch": 0.21, "grad_norm": 14.378345489501953, "learning_rate": 4.8071206896551724e-06, "loss": 0.4314, "step": 21200 }, { "epoch": 0.21, "grad_norm": 15.115830421447754, "learning_rate": 4.806689655172414e-06, "loss": 0.24, "step": 21225 }, { "epoch": 0.21, "grad_norm": 23.81594467163086, "learning_rate": 4.806258620689655e-06, "loss": 0.5368, "step": 21250 }, { "epoch": 0.21, "grad_norm": 16.635215759277344, "learning_rate": 4.805827586206897e-06, "loss": 0.3011, "step": 21275 }, { "epoch": 0.21, "grad_norm": 17.567333221435547, "learning_rate": 4.805396551724138e-06, "loss": 0.5162, "step": 21300 }, { "epoch": 0.21, "grad_norm": 13.827303886413574, "learning_rate": 4.80496551724138e-06, "loss": 0.2768, "step": 21325 }, { "epoch": 0.21, "grad_norm": 24.882265090942383, "learning_rate": 4.804534482758621e-06, "loss": 0.5039, "step": 21350 }, { "epoch": 0.21, "grad_norm": 10.188840866088867, "learning_rate": 4.804103448275863e-06, "loss": 0.2935, "step": 21375 }, { "epoch": 0.21, "grad_norm": 25.519044876098633, "learning_rate": 4.803672413793104e-06, "loss": 0.5202, "step": 21400 }, { "epoch": 0.21, "grad_norm": 6.0671844482421875, "learning_rate": 4.803241379310345e-06, "loss": 0.2512, "step": 21425 }, { "epoch": 0.21, "grad_norm": 19.110586166381836, "learning_rate": 4.802810344827586e-06, "loss": 0.4759, "step": 21450 }, { "epoch": 0.21, "grad_norm": 5.798599720001221, "learning_rate": 4.802379310344828e-06, "loss": 0.2793, "step": 21475 }, { "epoch": 0.21, "grad_norm": 18.79014778137207, "learning_rate": 4.801948275862069e-06, "loss": 0.4786, "step": 21500 }, { "epoch": 0.21, "grad_norm": 8.414015769958496, "learning_rate": 4.801517241379311e-06, "loss": 0.2777, "step": 21525 }, { "epoch": 0.21, "grad_norm": 14.487330436706543, "learning_rate": 4.801086206896552e-06, "loss": 0.3788, "step": 21550 }, { "epoch": 0.21, "grad_norm": 13.683119773864746, "learning_rate": 4.8006551724137934e-06, "loss": 0.2637, "step": 21575 }, { "epoch": 0.21, "grad_norm": 22.74057388305664, "learning_rate": 4.800224137931035e-06, "loss": 0.5039, "step": 21600 }, { "epoch": 0.21, "grad_norm": 16.68633460998535, "learning_rate": 4.799793103448276e-06, "loss": 0.2583, "step": 21625 }, { "epoch": 0.21, "grad_norm": 22.823759078979492, "learning_rate": 4.799362068965517e-06, "loss": 0.4987, "step": 21650 }, { "epoch": 0.21, "grad_norm": 13.311071395874023, "learning_rate": 4.798931034482759e-06, "loss": 0.2703, "step": 21675 }, { "epoch": 0.21, "grad_norm": 13.8178129196167, "learning_rate": 4.798500000000001e-06, "loss": 0.4733, "step": 21700 }, { "epoch": 0.21, "grad_norm": 10.276342391967773, "learning_rate": 4.798068965517242e-06, "loss": 0.2075, "step": 21725 }, { "epoch": 0.21, "grad_norm": 22.5045166015625, "learning_rate": 4.797637931034484e-06, "loss": 0.5293, "step": 21750 }, { "epoch": 0.21, "grad_norm": 9.254807472229004, "learning_rate": 4.797206896551724e-06, "loss": 0.2186, "step": 21775 }, { "epoch": 0.21, "grad_norm": 22.8551082611084, "learning_rate": 4.796775862068966e-06, "loss": 0.4527, "step": 21800 }, { "epoch": 0.21, "grad_norm": 12.44238567352295, "learning_rate": 4.796344827586207e-06, "loss": 0.2608, "step": 21825 }, { "epoch": 0.21, "grad_norm": 18.436330795288086, "learning_rate": 4.795913793103449e-06, "loss": 0.4915, "step": 21850 }, { "epoch": 0.22, "grad_norm": 13.940589904785156, "learning_rate": 4.79548275862069e-06, "loss": 0.2601, "step": 21875 }, { "epoch": 0.22, "grad_norm": 24.34344482421875, "learning_rate": 4.7950517241379315e-06, "loss": 0.4538, "step": 21900 }, { "epoch": 0.22, "grad_norm": 15.215433120727539, "learning_rate": 4.794620689655173e-06, "loss": 0.2746, "step": 21925 }, { "epoch": 0.22, "grad_norm": 24.938337326049805, "learning_rate": 4.7941896551724144e-06, "loss": 0.4308, "step": 21950 }, { "epoch": 0.22, "grad_norm": 9.904004096984863, "learning_rate": 4.793758620689656e-06, "loss": 0.2341, "step": 21975 }, { "epoch": 0.22, "grad_norm": 23.187824249267578, "learning_rate": 4.7933275862068965e-06, "loss": 0.4694, "step": 22000 }, { "epoch": 0.22, "grad_norm": 9.03372573852539, "learning_rate": 4.792896551724138e-06, "loss": 0.2559, "step": 22025 }, { "epoch": 0.22, "grad_norm": 20.03733253479004, "learning_rate": 4.7924655172413794e-06, "loss": 0.4563, "step": 22050 }, { "epoch": 0.22, "grad_norm": 10.283503532409668, "learning_rate": 4.792034482758621e-06, "loss": 0.2502, "step": 22075 }, { "epoch": 0.22, "grad_norm": 26.55593490600586, "learning_rate": 4.791603448275862e-06, "loss": 0.431, "step": 22100 }, { "epoch": 0.22, "grad_norm": 16.93250846862793, "learning_rate": 4.791172413793104e-06, "loss": 0.2636, "step": 22125 }, { "epoch": 0.22, "grad_norm": 18.64247703552246, "learning_rate": 4.790741379310345e-06, "loss": 0.4324, "step": 22150 }, { "epoch": 0.22, "grad_norm": 9.267961502075195, "learning_rate": 4.790310344827587e-06, "loss": 0.2398, "step": 22175 }, { "epoch": 0.22, "grad_norm": 24.864166259765625, "learning_rate": 4.789879310344828e-06, "loss": 0.4561, "step": 22200 }, { "epoch": 0.22, "grad_norm": 7.015978813171387, "learning_rate": 4.789448275862069e-06, "loss": 0.286, "step": 22225 }, { "epoch": 0.22, "grad_norm": 21.783044815063477, "learning_rate": 4.789017241379311e-06, "loss": 0.4036, "step": 22250 }, { "epoch": 0.22, "grad_norm": 14.593618392944336, "learning_rate": 4.7885862068965525e-06, "loss": 0.332, "step": 22275 }, { "epoch": 0.22, "grad_norm": 21.453575134277344, "learning_rate": 4.788155172413794e-06, "loss": 0.4949, "step": 22300 }, { "epoch": 0.22, "grad_norm": 6.391585826873779, "learning_rate": 4.787724137931035e-06, "loss": 0.2351, "step": 22325 }, { "epoch": 0.22, "grad_norm": 18.33053970336914, "learning_rate": 4.787293103448276e-06, "loss": 0.5366, "step": 22350 }, { "epoch": 0.22, "grad_norm": 17.414331436157227, "learning_rate": 4.7868620689655175e-06, "loss": 0.3043, "step": 22375 }, { "epoch": 0.22, "grad_norm": 21.386911392211914, "learning_rate": 4.786431034482759e-06, "loss": 0.4996, "step": 22400 }, { "epoch": 0.22, "grad_norm": 9.172154426574707, "learning_rate": 4.7860000000000004e-06, "loss": 0.2773, "step": 22425 }, { "epoch": 0.22, "grad_norm": 18.754474639892578, "learning_rate": 4.785568965517242e-06, "loss": 0.3888, "step": 22450 }, { "epoch": 0.22, "grad_norm": 9.601747512817383, "learning_rate": 4.785137931034483e-06, "loss": 0.2415, "step": 22475 }, { "epoch": 0.22, "grad_norm": 20.27402687072754, "learning_rate": 4.784706896551725e-06, "loss": 0.4864, "step": 22500 }, { "epoch": 0.22, "grad_norm": 13.22774887084961, "learning_rate": 4.784275862068966e-06, "loss": 0.2628, "step": 22525 }, { "epoch": 0.22, "grad_norm": 15.279277801513672, "learning_rate": 4.783844827586207e-06, "loss": 0.4226, "step": 22550 }, { "epoch": 0.22, "grad_norm": 10.433187484741211, "learning_rate": 4.783413793103448e-06, "loss": 0.2604, "step": 22575 }, { "epoch": 0.22, "grad_norm": 20.676252365112305, "learning_rate": 4.78298275862069e-06, "loss": 0.469, "step": 22600 }, { "epoch": 0.22, "grad_norm": 14.994335174560547, "learning_rate": 4.782551724137931e-06, "loss": 0.2544, "step": 22625 }, { "epoch": 0.22, "grad_norm": 10.616777420043945, "learning_rate": 4.782120689655173e-06, "loss": 0.4601, "step": 22650 }, { "epoch": 0.22, "grad_norm": 9.158272743225098, "learning_rate": 4.781689655172414e-06, "loss": 0.2797, "step": 22675 }, { "epoch": 0.22, "grad_norm": 15.60029411315918, "learning_rate": 4.781258620689656e-06, "loss": 0.5141, "step": 22700 }, { "epoch": 0.22, "grad_norm": 11.500184059143066, "learning_rate": 4.780827586206897e-06, "loss": 0.3147, "step": 22725 }, { "epoch": 0.22, "grad_norm": 28.47826385498047, "learning_rate": 4.7803965517241385e-06, "loss": 0.453, "step": 22750 }, { "epoch": 0.22, "grad_norm": 5.073351860046387, "learning_rate": 4.779965517241379e-06, "loss": 0.2765, "step": 22775 }, { "epoch": 0.22, "grad_norm": 21.32666015625, "learning_rate": 4.779534482758621e-06, "loss": 0.4875, "step": 22800 }, { "epoch": 0.22, "grad_norm": 9.271270751953125, "learning_rate": 4.779103448275863e-06, "loss": 0.2055, "step": 22825 }, { "epoch": 0.22, "grad_norm": 19.355506896972656, "learning_rate": 4.778672413793104e-06, "loss": 0.4044, "step": 22850 }, { "epoch": 0.22, "grad_norm": 9.80508804321289, "learning_rate": 4.778241379310346e-06, "loss": 0.2627, "step": 22875 }, { "epoch": 0.23, "grad_norm": 21.486085891723633, "learning_rate": 4.777810344827586e-06, "loss": 0.4925, "step": 22900 }, { "epoch": 0.23, "grad_norm": 10.854759216308594, "learning_rate": 4.777379310344828e-06, "loss": 0.2606, "step": 22925 }, { "epoch": 0.23, "grad_norm": 17.541379928588867, "learning_rate": 4.776948275862069e-06, "loss": 0.4497, "step": 22950 }, { "epoch": 0.23, "grad_norm": 11.013031005859375, "learning_rate": 4.776517241379311e-06, "loss": 0.2797, "step": 22975 }, { "epoch": 0.23, "grad_norm": 30.229328155517578, "learning_rate": 4.776086206896552e-06, "loss": 0.515, "step": 23000 }, { "epoch": 0.23, "grad_norm": 15.260751724243164, "learning_rate": 4.775655172413794e-06, "loss": 0.2633, "step": 23025 }, { "epoch": 0.23, "grad_norm": 25.60558319091797, "learning_rate": 4.775224137931035e-06, "loss": 0.5119, "step": 23050 }, { "epoch": 0.23, "grad_norm": 14.064022064208984, "learning_rate": 4.774793103448277e-06, "loss": 0.234, "step": 23075 }, { "epoch": 0.23, "grad_norm": 14.47806167602539, "learning_rate": 4.774362068965518e-06, "loss": 0.3856, "step": 23100 }, { "epoch": 0.23, "grad_norm": 8.318930625915527, "learning_rate": 4.773931034482759e-06, "loss": 0.2699, "step": 23125 }, { "epoch": 0.23, "grad_norm": 16.92315673828125, "learning_rate": 4.7735e-06, "loss": 0.4991, "step": 23150 }, { "epoch": 0.23, "grad_norm": 7.869355201721191, "learning_rate": 4.773068965517242e-06, "loss": 0.2519, "step": 23175 }, { "epoch": 0.23, "grad_norm": 10.525557518005371, "learning_rate": 4.772637931034483e-06, "loss": 0.4615, "step": 23200 }, { "epoch": 0.23, "grad_norm": 10.902024269104004, "learning_rate": 4.7722068965517245e-06, "loss": 0.2283, "step": 23225 }, { "epoch": 0.23, "grad_norm": 20.273578643798828, "learning_rate": 4.771775862068966e-06, "loss": 0.4162, "step": 23250 }, { "epoch": 0.23, "grad_norm": 8.189269065856934, "learning_rate": 4.771344827586207e-06, "loss": 0.2281, "step": 23275 }, { "epoch": 0.23, "grad_norm": 18.184961318969727, "learning_rate": 4.770913793103449e-06, "loss": 0.5554, "step": 23300 }, { "epoch": 0.23, "grad_norm": 9.466983795166016, "learning_rate": 4.7704827586206895e-06, "loss": 0.2459, "step": 23325 }, { "epoch": 0.23, "grad_norm": 18.74635124206543, "learning_rate": 4.770051724137931e-06, "loss": 0.5565, "step": 23350 }, { "epoch": 0.23, "grad_norm": 15.55083179473877, "learning_rate": 4.769620689655172e-06, "loss": 0.2171, "step": 23375 }, { "epoch": 0.23, "grad_norm": 15.577181816101074, "learning_rate": 4.769189655172415e-06, "loss": 0.4807, "step": 23400 }, { "epoch": 0.23, "grad_norm": 13.81781005859375, "learning_rate": 4.768758620689656e-06, "loss": 0.3172, "step": 23425 }, { "epoch": 0.23, "grad_norm": 22.001220703125, "learning_rate": 4.768327586206897e-06, "loss": 0.4937, "step": 23450 }, { "epoch": 0.23, "grad_norm": 12.42264461517334, "learning_rate": 4.767896551724138e-06, "loss": 0.2545, "step": 23475 }, { "epoch": 0.23, "grad_norm": 17.339309692382812, "learning_rate": 4.76746551724138e-06, "loss": 0.503, "step": 23500 }, { "epoch": 0.23, "grad_norm": 10.121502876281738, "learning_rate": 4.767034482758621e-06, "loss": 0.2366, "step": 23525 }, { "epoch": 0.23, "grad_norm": 11.01052474975586, "learning_rate": 4.766603448275863e-06, "loss": 0.4424, "step": 23550 }, { "epoch": 0.23, "grad_norm": 8.181034088134766, "learning_rate": 4.766172413793104e-06, "loss": 0.2247, "step": 23575 }, { "epoch": 0.23, "grad_norm": 12.867955207824707, "learning_rate": 4.7657413793103455e-06, "loss": 0.431, "step": 23600 }, { "epoch": 0.23, "grad_norm": 18.68170928955078, "learning_rate": 4.765310344827587e-06, "loss": 0.2922, "step": 23625 }, { "epoch": 0.23, "grad_norm": 22.707796096801758, "learning_rate": 4.764879310344828e-06, "loss": 0.5354, "step": 23650 }, { "epoch": 0.23, "grad_norm": 12.441468238830566, "learning_rate": 4.764448275862069e-06, "loss": 0.2528, "step": 23675 }, { "epoch": 0.23, "grad_norm": 18.683025360107422, "learning_rate": 4.7640172413793105e-06, "loss": 0.4231, "step": 23700 }, { "epoch": 0.23, "grad_norm": 25.266096115112305, "learning_rate": 4.763586206896552e-06, "loss": 0.2869, "step": 23725 }, { "epoch": 0.23, "grad_norm": 16.032169342041016, "learning_rate": 4.763155172413793e-06, "loss": 0.443, "step": 23750 }, { "epoch": 0.23, "grad_norm": 10.779337882995605, "learning_rate": 4.762724137931035e-06, "loss": 0.273, "step": 23775 }, { "epoch": 0.23, "grad_norm": 18.97261619567871, "learning_rate": 4.762293103448276e-06, "loss": 0.4299, "step": 23800 }, { "epoch": 0.23, "grad_norm": 12.179319381713867, "learning_rate": 4.761862068965518e-06, "loss": 0.2497, "step": 23825 }, { "epoch": 0.23, "grad_norm": 24.385921478271484, "learning_rate": 4.761431034482759e-06, "loss": 0.5132, "step": 23850 }, { "epoch": 0.23, "grad_norm": 12.899528503417969, "learning_rate": 4.761000000000001e-06, "loss": 0.2635, "step": 23875 }, { "epoch": 0.24, "grad_norm": 17.377887725830078, "learning_rate": 4.760568965517241e-06, "loss": 0.5007, "step": 23900 }, { "epoch": 0.24, "grad_norm": 10.487576484680176, "learning_rate": 4.760137931034483e-06, "loss": 0.2303, "step": 23925 }, { "epoch": 0.24, "grad_norm": 26.119279861450195, "learning_rate": 4.759706896551724e-06, "loss": 0.5005, "step": 23950 }, { "epoch": 0.24, "grad_norm": 12.56003475189209, "learning_rate": 4.7592758620689665e-06, "loss": 0.2262, "step": 23975 }, { "epoch": 0.24, "grad_norm": 17.952495574951172, "learning_rate": 4.758844827586207e-06, "loss": 0.43, "step": 24000 }, { "epoch": 0.24, "grad_norm": 10.194031715393066, "learning_rate": 4.7584137931034486e-06, "loss": 0.2629, "step": 24025 }, { "epoch": 0.24, "grad_norm": 22.754911422729492, "learning_rate": 4.75798275862069e-06, "loss": 0.493, "step": 24050 }, { "epoch": 0.24, "grad_norm": 10.505803108215332, "learning_rate": 4.7575517241379315e-06, "loss": 0.2545, "step": 24075 }, { "epoch": 0.24, "grad_norm": 16.751405715942383, "learning_rate": 4.757120689655173e-06, "loss": 0.4788, "step": 24100 }, { "epoch": 0.24, "grad_norm": 12.702531814575195, "learning_rate": 4.756689655172414e-06, "loss": 0.269, "step": 24125 }, { "epoch": 0.24, "grad_norm": 17.973825454711914, "learning_rate": 4.756258620689656e-06, "loss": 0.4127, "step": 24150 }, { "epoch": 0.24, "grad_norm": 12.175528526306152, "learning_rate": 4.755827586206897e-06, "loss": 0.2541, "step": 24175 }, { "epoch": 0.24, "grad_norm": 24.188955307006836, "learning_rate": 4.755396551724139e-06, "loss": 0.4531, "step": 24200 }, { "epoch": 0.24, "grad_norm": 3.063077211380005, "learning_rate": 4.754965517241379e-06, "loss": 0.2664, "step": 24225 }, { "epoch": 0.24, "grad_norm": 22.603748321533203, "learning_rate": 4.754534482758621e-06, "loss": 0.442, "step": 24250 }, { "epoch": 0.24, "grad_norm": 14.451277732849121, "learning_rate": 4.754103448275862e-06, "loss": 0.2553, "step": 24275 }, { "epoch": 0.24, "grad_norm": 25.066574096679688, "learning_rate": 4.753672413793104e-06, "loss": 0.4924, "step": 24300 }, { "epoch": 0.24, "grad_norm": 15.331714630126953, "learning_rate": 4.753241379310345e-06, "loss": 0.3463, "step": 24325 }, { "epoch": 0.24, "grad_norm": 15.215587615966797, "learning_rate": 4.752810344827587e-06, "loss": 0.4473, "step": 24350 }, { "epoch": 0.24, "grad_norm": 7.66945219039917, "learning_rate": 4.752379310344828e-06, "loss": 0.2682, "step": 24375 }, { "epoch": 0.24, "grad_norm": 17.169939041137695, "learning_rate": 4.7519482758620696e-06, "loss": 0.4549, "step": 24400 }, { "epoch": 0.24, "grad_norm": 13.242311477661133, "learning_rate": 4.751517241379311e-06, "loss": 0.2347, "step": 24425 }, { "epoch": 0.24, "grad_norm": 19.71095085144043, "learning_rate": 4.751086206896552e-06, "loss": 0.5577, "step": 24450 }, { "epoch": 0.24, "grad_norm": 12.312201499938965, "learning_rate": 4.750655172413793e-06, "loss": 0.3029, "step": 24475 }, { "epoch": 0.24, "grad_norm": 20.33350372314453, "learning_rate": 4.7502241379310345e-06, "loss": 0.5088, "step": 24500 }, { "epoch": 0.24, "grad_norm": 20.293628692626953, "learning_rate": 4.749793103448276e-06, "loss": 0.2573, "step": 24525 }, { "epoch": 0.24, "grad_norm": 16.119831085205078, "learning_rate": 4.749362068965518e-06, "loss": 0.4283, "step": 24550 }, { "epoch": 0.24, "grad_norm": 15.21661376953125, "learning_rate": 4.748931034482759e-06, "loss": 0.2521, "step": 24575 }, { "epoch": 0.24, "grad_norm": 17.359405517578125, "learning_rate": 4.7485e-06, "loss": 0.3753, "step": 24600 }, { "epoch": 0.24, "grad_norm": 12.009432792663574, "learning_rate": 4.748068965517242e-06, "loss": 0.2894, "step": 24625 }, { "epoch": 0.24, "grad_norm": 9.534246444702148, "learning_rate": 4.747637931034483e-06, "loss": 0.3943, "step": 24650 }, { "epoch": 0.24, "grad_norm": 14.282201766967773, "learning_rate": 4.747206896551724e-06, "loss": 0.2505, "step": 24675 }, { "epoch": 0.24, "grad_norm": 19.30034828186035, "learning_rate": 4.746775862068966e-06, "loss": 0.4347, "step": 24700 }, { "epoch": 0.24, "grad_norm": 13.522390365600586, "learning_rate": 4.746344827586208e-06, "loss": 0.2669, "step": 24725 }, { "epoch": 0.24, "grad_norm": 12.77908992767334, "learning_rate": 4.745913793103449e-06, "loss": 0.4519, "step": 24750 }, { "epoch": 0.24, "grad_norm": 7.882726669311523, "learning_rate": 4.7454827586206906e-06, "loss": 0.2223, "step": 24775 }, { "epoch": 0.24, "grad_norm": 10.037403106689453, "learning_rate": 4.745051724137931e-06, "loss": 0.5145, "step": 24800 }, { "epoch": 0.24, "grad_norm": 8.395181655883789, "learning_rate": 4.744620689655173e-06, "loss": 0.2405, "step": 24825 }, { "epoch": 0.24, "grad_norm": 49.55900192260742, "learning_rate": 4.744189655172414e-06, "loss": 0.4324, "step": 24850 }, { "epoch": 0.24, "grad_norm": 14.86780834197998, "learning_rate": 4.7437586206896555e-06, "loss": 0.2507, "step": 24875 }, { "epoch": 0.24, "grad_norm": 19.207414627075195, "learning_rate": 4.743327586206897e-06, "loss": 0.443, "step": 24900 }, { "epoch": 0.25, "grad_norm": 9.106844902038574, "learning_rate": 4.7428965517241384e-06, "loss": 0.2776, "step": 24925 }, { "epoch": 0.25, "grad_norm": Infinity, "learning_rate": 4.74248275862069e-06, "loss": 0.4414, "step": 24950 }, { "epoch": 0.25, "grad_norm": 13.74856948852539, "learning_rate": 4.742051724137931e-06, "loss": 0.2236, "step": 24975 }, { "epoch": 0.25, "grad_norm": 19.308635711669922, "learning_rate": 4.741620689655173e-06, "loss": 0.4805, "step": 25000 }, { "epoch": 0.25, "grad_norm": 7.974446773529053, "learning_rate": 4.741189655172414e-06, "loss": 0.2969, "step": 25025 }, { "epoch": 0.25, "grad_norm": 25.539945602416992, "learning_rate": 4.740758620689656e-06, "loss": 0.4619, "step": 25050 }, { "epoch": 0.25, "grad_norm": 13.385208129882812, "learning_rate": 4.740327586206897e-06, "loss": 0.2773, "step": 25075 }, { "epoch": 0.25, "grad_norm": 26.361249923706055, "learning_rate": 4.739896551724139e-06, "loss": 0.4708, "step": 25100 }, { "epoch": 0.25, "grad_norm": 7.390927791595459, "learning_rate": 4.739465517241379e-06, "loss": 0.1965, "step": 25125 }, { "epoch": 0.25, "grad_norm": Infinity, "learning_rate": 4.7390517241379316e-06, "loss": 0.5304, "step": 25150 }, { "epoch": 0.25, "grad_norm": 11.005501747131348, "learning_rate": 4.738620689655173e-06, "loss": 0.2529, "step": 25175 }, { "epoch": 0.25, "grad_norm": 17.13377571105957, "learning_rate": 4.7381896551724145e-06, "loss": 0.4297, "step": 25200 }, { "epoch": 0.25, "grad_norm": 3.6723008155822754, "learning_rate": 4.737758620689655e-06, "loss": 0.1986, "step": 25225 }, { "epoch": 0.25, "grad_norm": 65.72669219970703, "learning_rate": 4.7373275862068965e-06, "loss": 0.4578, "step": 25250 }, { "epoch": 0.25, "grad_norm": 4.446256160736084, "learning_rate": 4.736896551724138e-06, "loss": 0.2584, "step": 25275 }, { "epoch": 0.25, "grad_norm": 29.664438247680664, "learning_rate": 4.7364655172413795e-06, "loss": 0.415, "step": 25300 }, { "epoch": 0.25, "grad_norm": 11.30808162689209, "learning_rate": 4.736034482758621e-06, "loss": 0.3029, "step": 25325 }, { "epoch": 0.25, "grad_norm": 21.538360595703125, "learning_rate": 4.735603448275862e-06, "loss": 0.4359, "step": 25350 }, { "epoch": 0.25, "grad_norm": 10.68735408782959, "learning_rate": 4.735172413793104e-06, "loss": 0.2309, "step": 25375 }, { "epoch": 0.25, "grad_norm": 15.548359870910645, "learning_rate": 4.734741379310345e-06, "loss": 0.4728, "step": 25400 }, { "epoch": 0.25, "grad_norm": 13.712284088134766, "learning_rate": 4.734310344827587e-06, "loss": 0.219, "step": 25425 }, { "epoch": 0.25, "grad_norm": 20.793094635009766, "learning_rate": 4.733879310344827e-06, "loss": 0.4973, "step": 25450 }, { "epoch": 0.25, "grad_norm": 10.023950576782227, "learning_rate": 4.73344827586207e-06, "loss": 0.2304, "step": 25475 }, { "epoch": 0.25, "grad_norm": 20.916086196899414, "learning_rate": 4.733017241379311e-06, "loss": 0.4222, "step": 25500 }, { "epoch": 0.25, "grad_norm": 10.14462661743164, "learning_rate": 4.7325862068965526e-06, "loss": 0.2248, "step": 25525 }, { "epoch": 0.25, "grad_norm": 13.760385513305664, "learning_rate": 4.732155172413794e-06, "loss": 0.4545, "step": 25550 }, { "epoch": 0.25, "grad_norm": 8.643594741821289, "learning_rate": 4.731724137931035e-06, "loss": 0.239, "step": 25575 }, { "epoch": 0.25, "grad_norm": 17.470624923706055, "learning_rate": 4.731293103448276e-06, "loss": 0.4634, "step": 25600 }, { "epoch": 0.25, "grad_norm": 8.171894073486328, "learning_rate": 4.7308620689655175e-06, "loss": 0.2147, "step": 25625 }, { "epoch": 0.25, "grad_norm": 10.848057746887207, "learning_rate": 4.730431034482759e-06, "loss": 0.4467, "step": 25650 }, { "epoch": 0.25, "grad_norm": 15.066641807556152, "learning_rate": 4.7300000000000005e-06, "loss": 0.2287, "step": 25675 }, { "epoch": 0.25, "grad_norm": 9.863062858581543, "learning_rate": 4.729568965517242e-06, "loss": 0.4255, "step": 25700 }, { "epoch": 0.25, "grad_norm": 15.457619667053223, "learning_rate": 4.729137931034483e-06, "loss": 0.2264, "step": 25725 }, { "epoch": 0.25, "grad_norm": 18.73763084411621, "learning_rate": 4.728706896551725e-06, "loss": 0.3766, "step": 25750 }, { "epoch": 0.25, "grad_norm": 8.68080997467041, "learning_rate": 4.728275862068966e-06, "loss": 0.2134, "step": 25775 }, { "epoch": 0.25, "grad_norm": 23.72300148010254, "learning_rate": 4.727844827586207e-06, "loss": 0.4872, "step": 25800 }, { "epoch": 0.25, "grad_norm": 13.758463859558105, "learning_rate": 4.727413793103448e-06, "loss": 0.2612, "step": 25825 }, { "epoch": 0.25, "grad_norm": 17.740018844604492, "learning_rate": 4.72698275862069e-06, "loss": 0.47, "step": 25850 }, { "epoch": 0.25, "grad_norm": 10.864133834838867, "learning_rate": 4.726551724137931e-06, "loss": 0.2406, "step": 25875 }, { "epoch": 0.25, "grad_norm": 18.71234893798828, "learning_rate": 4.726120689655173e-06, "loss": 0.4611, "step": 25900 }, { "epoch": 0.25, "grad_norm": 5.946761608123779, "learning_rate": 4.725689655172414e-06, "loss": 0.2396, "step": 25925 }, { "epoch": 0.26, "grad_norm": 14.12573528289795, "learning_rate": 4.725258620689656e-06, "loss": 0.4817, "step": 25950 }, { "epoch": 0.26, "grad_norm": 20.08176612854004, "learning_rate": 4.724827586206897e-06, "loss": 0.2514, "step": 25975 }, { "epoch": 0.26, "grad_norm": 20.867385864257812, "learning_rate": 4.7243965517241385e-06, "loss": 0.4493, "step": 26000 }, { "epoch": 0.26, "grad_norm": 7.689480304718018, "learning_rate": 4.723965517241379e-06, "loss": 0.2537, "step": 26025 }, { "epoch": 0.26, "grad_norm": 21.291595458984375, "learning_rate": 4.7235344827586215e-06, "loss": 0.4188, "step": 26050 }, { "epoch": 0.26, "grad_norm": 13.723553657531738, "learning_rate": 4.723103448275863e-06, "loss": 0.2982, "step": 26075 }, { "epoch": 0.26, "grad_norm": 18.724674224853516, "learning_rate": 4.722672413793104e-06, "loss": 0.3898, "step": 26100 }, { "epoch": 0.26, "grad_norm": 13.497417449951172, "learning_rate": 4.722241379310345e-06, "loss": 0.2534, "step": 26125 }, { "epoch": 0.26, "grad_norm": 16.645034790039062, "learning_rate": 4.7218103448275864e-06, "loss": 0.4799, "step": 26150 }, { "epoch": 0.26, "grad_norm": 7.082431316375732, "learning_rate": 4.721379310344828e-06, "loss": 0.2208, "step": 26175 }, { "epoch": 0.26, "grad_norm": 34.31421661376953, "learning_rate": 4.720948275862069e-06, "loss": 0.4544, "step": 26200 }, { "epoch": 0.26, "grad_norm": 11.577409744262695, "learning_rate": 4.720517241379311e-06, "loss": 0.2823, "step": 26225 }, { "epoch": 0.26, "grad_norm": 18.42605972290039, "learning_rate": 4.720086206896552e-06, "loss": 0.5514, "step": 26250 }, { "epoch": 0.26, "grad_norm": 10.647745132446289, "learning_rate": 4.719655172413794e-06, "loss": 0.2432, "step": 26275 }, { "epoch": 0.26, "grad_norm": 18.71998405456543, "learning_rate": 4.719224137931035e-06, "loss": 0.4119, "step": 26300 }, { "epoch": 0.26, "grad_norm": 10.471118927001953, "learning_rate": 4.718793103448277e-06, "loss": 0.2262, "step": 26325 }, { "epoch": 0.26, "grad_norm": 16.514795303344727, "learning_rate": 4.718362068965517e-06, "loss": 0.4033, "step": 26350 }, { "epoch": 0.26, "grad_norm": 14.856104850769043, "learning_rate": 4.717931034482759e-06, "loss": 0.2421, "step": 26375 }, { "epoch": 0.26, "grad_norm": 21.638111114501953, "learning_rate": 4.7175e-06, "loss": 0.5079, "step": 26400 }, { "epoch": 0.26, "grad_norm": 11.497940063476562, "learning_rate": 4.717068965517242e-06, "loss": 0.2928, "step": 26425 }, { "epoch": 0.26, "grad_norm": 18.91143226623535, "learning_rate": 4.716637931034483e-06, "loss": 0.5093, "step": 26450 }, { "epoch": 0.26, "grad_norm": 10.284409523010254, "learning_rate": 4.7162068965517245e-06, "loss": 0.2529, "step": 26475 }, { "epoch": 0.26, "grad_norm": 15.786381721496582, "learning_rate": 4.715775862068966e-06, "loss": 0.4793, "step": 26500 }, { "epoch": 0.26, "grad_norm": 7.3781585693359375, "learning_rate": 4.7153448275862074e-06, "loss": 0.2469, "step": 26525 }, { "epoch": 0.26, "grad_norm": 18.84273910522461, "learning_rate": 4.714913793103449e-06, "loss": 0.4131, "step": 26550 }, { "epoch": 0.26, "grad_norm": 17.06093406677246, "learning_rate": 4.7144827586206895e-06, "loss": 0.2532, "step": 26575 }, { "epoch": 0.26, "grad_norm": 10.584579467773438, "learning_rate": 4.714051724137931e-06, "loss": 0.3939, "step": 26600 }, { "epoch": 0.26, "grad_norm": 12.39176082611084, "learning_rate": 4.713620689655172e-06, "loss": 0.2453, "step": 26625 }, { "epoch": 0.26, "grad_norm": 19.53977394104004, "learning_rate": 4.713189655172415e-06, "loss": 0.4445, "step": 26650 }, { "epoch": 0.26, "grad_norm": 7.758911609649658, "learning_rate": 4.712758620689655e-06, "loss": 0.2041, "step": 26675 }, { "epoch": 0.26, "grad_norm": 14.981077194213867, "learning_rate": 4.712327586206897e-06, "loss": 0.4551, "step": 26700 }, { "epoch": 0.26, "grad_norm": 7.177206039428711, "learning_rate": 4.711896551724138e-06, "loss": 0.2514, "step": 26725 }, { "epoch": 0.26, "grad_norm": 19.47385597229004, "learning_rate": 4.71146551724138e-06, "loss": 0.4623, "step": 26750 }, { "epoch": 0.26, "grad_norm": 8.476311683654785, "learning_rate": 4.711034482758621e-06, "loss": 0.2088, "step": 26775 }, { "epoch": 0.26, "grad_norm": 17.605318069458008, "learning_rate": 4.710603448275863e-06, "loss": 0.4202, "step": 26800 }, { "epoch": 0.26, "grad_norm": 15.92267894744873, "learning_rate": 4.710172413793104e-06, "loss": 0.2446, "step": 26825 }, { "epoch": 0.26, "grad_norm": 19.0742244720459, "learning_rate": 4.7097413793103455e-06, "loss": 0.4676, "step": 26850 }, { "epoch": 0.26, "grad_norm": 7.497840404510498, "learning_rate": 4.709310344827587e-06, "loss": 0.2167, "step": 26875 }, { "epoch": 0.26, "grad_norm": 24.28984832763672, "learning_rate": 4.708879310344828e-06, "loss": 0.4685, "step": 26900 }, { "epoch": 0.26, "grad_norm": 8.256694793701172, "learning_rate": 4.708448275862069e-06, "loss": 0.2564, "step": 26925 }, { "epoch": 0.27, "grad_norm": 14.986788749694824, "learning_rate": 4.7080172413793105e-06, "loss": 0.4193, "step": 26950 }, { "epoch": 0.27, "grad_norm": 10.869905471801758, "learning_rate": 4.707586206896552e-06, "loss": 0.2365, "step": 26975 }, { "epoch": 0.27, "grad_norm": 20.38754653930664, "learning_rate": 4.707155172413793e-06, "loss": 0.4482, "step": 27000 }, { "epoch": 0.27, "grad_norm": 9.312156677246094, "learning_rate": 4.706724137931035e-06, "loss": 0.2166, "step": 27025 }, { "epoch": 0.27, "grad_norm": 19.994169235229492, "learning_rate": 4.706293103448276e-06, "loss": 0.446, "step": 27050 }, { "epoch": 0.27, "grad_norm": 6.953749179840088, "learning_rate": 4.705862068965518e-06, "loss": 0.2875, "step": 27075 }, { "epoch": 0.27, "grad_norm": 16.403623580932617, "learning_rate": 4.705431034482759e-06, "loss": 0.4255, "step": 27100 }, { "epoch": 0.27, "grad_norm": 11.709450721740723, "learning_rate": 4.705e-06, "loss": 0.2771, "step": 27125 }, { "epoch": 0.27, "grad_norm": 17.657835006713867, "learning_rate": 4.704568965517241e-06, "loss": 0.4389, "step": 27150 }, { "epoch": 0.27, "grad_norm": 16.38019561767578, "learning_rate": 4.704137931034483e-06, "loss": 0.2505, "step": 27175 }, { "epoch": 0.27, "grad_norm": 14.268684387207031, "learning_rate": 4.703706896551724e-06, "loss": 0.4244, "step": 27200 }, { "epoch": 0.27, "grad_norm": 19.340641021728516, "learning_rate": 4.7032758620689665e-06, "loss": 0.2661, "step": 27225 }, { "epoch": 0.27, "grad_norm": 19.525972366333008, "learning_rate": 4.702844827586207e-06, "loss": 0.4881, "step": 27250 }, { "epoch": 0.27, "grad_norm": 18.00792694091797, "learning_rate": 4.702413793103449e-06, "loss": 0.2285, "step": 27275 }, { "epoch": 0.27, "grad_norm": 21.488080978393555, "learning_rate": 4.70198275862069e-06, "loss": 0.463, "step": 27300 }, { "epoch": 0.27, "grad_norm": 9.75406265258789, "learning_rate": 4.7015517241379315e-06, "loss": 0.351, "step": 27325 }, { "epoch": 0.27, "grad_norm": 22.1614990234375, "learning_rate": 4.701120689655173e-06, "loss": 0.4658, "step": 27350 }, { "epoch": 0.27, "grad_norm": 14.036871910095215, "learning_rate": 4.700689655172414e-06, "loss": 0.219, "step": 27375 }, { "epoch": 0.27, "grad_norm": 15.400257110595703, "learning_rate": 4.700258620689656e-06, "loss": 0.3849, "step": 27400 }, { "epoch": 0.27, "grad_norm": 11.184530258178711, "learning_rate": 4.699827586206897e-06, "loss": 0.2381, "step": 27425 }, { "epoch": 0.27, "grad_norm": 17.83879852294922, "learning_rate": 4.699396551724139e-06, "loss": 0.4765, "step": 27450 }, { "epoch": 0.27, "grad_norm": 10.157797813415527, "learning_rate": 4.698965517241379e-06, "loss": 0.2094, "step": 27475 }, { "epoch": 0.27, "grad_norm": 16.596559524536133, "learning_rate": 4.698534482758621e-06, "loss": 0.481, "step": 27500 }, { "epoch": 0.27, "grad_norm": 15.776806831359863, "learning_rate": 4.698103448275862e-06, "loss": 0.2403, "step": 27525 }, { "epoch": 0.27, "grad_norm": 18.589139938354492, "learning_rate": 4.697672413793104e-06, "loss": 0.5083, "step": 27550 }, { "epoch": 0.27, "grad_norm": 11.412109375, "learning_rate": 4.697241379310345e-06, "loss": 0.266, "step": 27575 }, { "epoch": 0.27, "grad_norm": 22.57651710510254, "learning_rate": 4.696810344827587e-06, "loss": 0.435, "step": 27600 }, { "epoch": 0.27, "grad_norm": 12.073596000671387, "learning_rate": 4.696379310344828e-06, "loss": 0.259, "step": 27625 }, { "epoch": 0.27, "grad_norm": 22.816856384277344, "learning_rate": 4.69594827586207e-06, "loss": 0.4702, "step": 27650 }, { "epoch": 0.27, "grad_norm": 6.479723930358887, "learning_rate": 4.695517241379311e-06, "loss": 0.2161, "step": 27675 }, { "epoch": 0.27, "grad_norm": 16.52065658569336, "learning_rate": 4.695086206896552e-06, "loss": 0.4532, "step": 27700 }, { "epoch": 0.27, "grad_norm": 8.043344497680664, "learning_rate": 4.694655172413793e-06, "loss": 0.2304, "step": 27725 }, { "epoch": 0.27, "grad_norm": 11.821054458618164, "learning_rate": 4.6942241379310346e-06, "loss": 0.4503, "step": 27750 }, { "epoch": 0.27, "grad_norm": 13.472885131835938, "learning_rate": 4.693793103448276e-06, "loss": 0.2181, "step": 27775 }, { "epoch": 0.27, "grad_norm": 18.97551727294922, "learning_rate": 4.6933620689655175e-06, "loss": 0.5357, "step": 27800 }, { "epoch": 0.27, "grad_norm": 9.667357444763184, "learning_rate": 4.692931034482759e-06, "loss": 0.2228, "step": 27825 }, { "epoch": 0.27, "grad_norm": 14.543728828430176, "learning_rate": 4.6925e-06, "loss": 0.4406, "step": 27850 }, { "epoch": 0.27, "grad_norm": 11.255159378051758, "learning_rate": 4.692068965517242e-06, "loss": 0.2766, "step": 27875 }, { "epoch": 0.27, "grad_norm": 23.754789352416992, "learning_rate": 4.691637931034483e-06, "loss": 0.467, "step": 27900 }, { "epoch": 0.27, "grad_norm": 13.384151458740234, "learning_rate": 4.691206896551725e-06, "loss": 0.2329, "step": 27925 }, { "epoch": 0.27, "grad_norm": 17.042287826538086, "learning_rate": 4.690775862068966e-06, "loss": 0.4437, "step": 27950 }, { "epoch": 0.28, "grad_norm": 10.640833854675293, "learning_rate": 4.690344827586208e-06, "loss": 0.2734, "step": 27975 }, { "epoch": 0.28, "grad_norm": 14.50550365447998, "learning_rate": 4.689913793103449e-06, "loss": 0.518, "step": 28000 }, { "epoch": 0.28, "grad_norm": 12.705199241638184, "learning_rate": 4.68948275862069e-06, "loss": 0.2293, "step": 28025 }, { "epoch": 0.28, "grad_norm": 23.7604923248291, "learning_rate": 4.689051724137931e-06, "loss": 0.4411, "step": 28050 }, { "epoch": 0.28, "grad_norm": 13.641227722167969, "learning_rate": 4.688620689655173e-06, "loss": 0.277, "step": 28075 }, { "epoch": 0.28, "grad_norm": 19.688846588134766, "learning_rate": 4.688189655172414e-06, "loss": 0.488, "step": 28100 }, { "epoch": 0.28, "grad_norm": 9.323521614074707, "learning_rate": 4.6877586206896556e-06, "loss": 0.2216, "step": 28125 }, { "epoch": 0.28, "grad_norm": 19.83163070678711, "learning_rate": 4.687327586206897e-06, "loss": 0.4899, "step": 28150 }, { "epoch": 0.28, "grad_norm": 6.183542728424072, "learning_rate": 4.6868965517241385e-06, "loss": 0.2388, "step": 28175 }, { "epoch": 0.28, "grad_norm": 15.034454345703125, "learning_rate": 4.68646551724138e-06, "loss": 0.3716, "step": 28200 }, { "epoch": 0.28, "grad_norm": 8.660066604614258, "learning_rate": 4.686034482758621e-06, "loss": 0.1801, "step": 28225 }, { "epoch": 0.28, "grad_norm": Infinity, "learning_rate": 4.685620689655173e-06, "loss": 0.524, "step": 28250 }, { "epoch": 0.28, "grad_norm": 11.024003982543945, "learning_rate": 4.685189655172414e-06, "loss": 0.2738, "step": 28275 }, { "epoch": 0.28, "grad_norm": 20.379966735839844, "learning_rate": 4.684758620689656e-06, "loss": 0.462, "step": 28300 }, { "epoch": 0.28, "grad_norm": 12.873583793640137, "learning_rate": 4.684327586206897e-06, "loss": 0.2371, "step": 28325 }, { "epoch": 0.28, "grad_norm": 19.295974731445312, "learning_rate": 4.683896551724138e-06, "loss": 0.4215, "step": 28350 }, { "epoch": 0.28, "grad_norm": 7.805644512176514, "learning_rate": 4.683465517241379e-06, "loss": 0.2285, "step": 28375 }, { "epoch": 0.28, "grad_norm": 22.484054565429688, "learning_rate": 4.683034482758621e-06, "loss": 0.4865, "step": 28400 }, { "epoch": 0.28, "grad_norm": 8.097383499145508, "learning_rate": 4.682603448275862e-06, "loss": 0.2267, "step": 28425 }, { "epoch": 0.28, "grad_norm": 17.517301559448242, "learning_rate": 4.682172413793104e-06, "loss": 0.4107, "step": 28450 }, { "epoch": 0.28, "grad_norm": 8.431361198425293, "learning_rate": 4.681741379310345e-06, "loss": 0.2234, "step": 28475 }, { "epoch": 0.28, "grad_norm": 19.184085845947266, "learning_rate": 4.6813103448275866e-06, "loss": 0.4182, "step": 28500 }, { "epoch": 0.28, "grad_norm": 11.792770385742188, "learning_rate": 4.680879310344828e-06, "loss": 0.2433, "step": 28525 }, { "epoch": 0.28, "grad_norm": 21.125961303710938, "learning_rate": 4.6804482758620695e-06, "loss": 0.4699, "step": 28550 }, { "epoch": 0.28, "grad_norm": 7.610108375549316, "learning_rate": 4.68001724137931e-06, "loss": 0.2382, "step": 28575 }, { "epoch": 0.28, "grad_norm": 26.54749298095703, "learning_rate": 4.6795862068965515e-06, "loss": 0.458, "step": 28600 }, { "epoch": 0.28, "grad_norm": 9.156508445739746, "learning_rate": 4.679155172413794e-06, "loss": 0.2406, "step": 28625 }, { "epoch": 0.28, "grad_norm": 16.79827308654785, "learning_rate": 4.678724137931035e-06, "loss": 0.4935, "step": 28650 }, { "epoch": 0.28, "grad_norm": 10.35114860534668, "learning_rate": 4.678293103448277e-06, "loss": 0.1923, "step": 28675 }, { "epoch": 0.28, "grad_norm": 13.495057106018066, "learning_rate": 4.677862068965517e-06, "loss": 0.4744, "step": 28700 }, { "epoch": 0.28, "grad_norm": 9.681108474731445, "learning_rate": 4.677431034482759e-06, "loss": 0.2187, "step": 28725 }, { "epoch": 0.28, "grad_norm": 21.442073822021484, "learning_rate": 4.677e-06, "loss": 0.3975, "step": 28750 }, { "epoch": 0.28, "grad_norm": 10.16470718383789, "learning_rate": 4.676568965517242e-06, "loss": 0.2401, "step": 28775 }, { "epoch": 0.28, "grad_norm": 21.99334716796875, "learning_rate": 4.676137931034483e-06, "loss": 0.4749, "step": 28800 }, { "epoch": 0.28, "grad_norm": 4.8118896484375, "learning_rate": 4.675706896551725e-06, "loss": 0.2557, "step": 28825 }, { "epoch": 0.28, "grad_norm": 24.184988021850586, "learning_rate": 4.675275862068966e-06, "loss": 0.5016, "step": 28850 }, { "epoch": 0.28, "grad_norm": 11.740754127502441, "learning_rate": 4.6748448275862076e-06, "loss": 0.2466, "step": 28875 }, { "epoch": 0.28, "grad_norm": 23.20452880859375, "learning_rate": 4.674413793103449e-06, "loss": 0.5157, "step": 28900 }, { "epoch": 0.28, "grad_norm": 12.252065658569336, "learning_rate": 4.67398275862069e-06, "loss": 0.2621, "step": 28925 }, { "epoch": 0.28, "grad_norm": 21.465118408203125, "learning_rate": 4.673551724137931e-06, "loss": 0.4341, "step": 28950 }, { "epoch": 0.28, "grad_norm": 9.590704917907715, "learning_rate": 4.6731206896551725e-06, "loss": 0.2304, "step": 28975 }, { "epoch": 0.29, "grad_norm": 16.81780433654785, "learning_rate": 4.672689655172414e-06, "loss": 0.4529, "step": 29000 }, { "epoch": 0.29, "grad_norm": 14.898069381713867, "learning_rate": 4.6722586206896555e-06, "loss": 0.2952, "step": 29025 }, { "epoch": 0.29, "grad_norm": 19.62278175354004, "learning_rate": 4.671827586206897e-06, "loss": 0.4186, "step": 29050 }, { "epoch": 0.29, "grad_norm": 9.529239654541016, "learning_rate": 4.671396551724138e-06, "loss": 0.274, "step": 29075 }, { "epoch": 0.29, "grad_norm": 14.61416244506836, "learning_rate": 4.67096551724138e-06, "loss": 0.4136, "step": 29100 }, { "epoch": 0.29, "grad_norm": 9.479650497436523, "learning_rate": 4.670534482758621e-06, "loss": 0.2335, "step": 29125 }, { "epoch": 0.29, "grad_norm": 20.85100555419922, "learning_rate": 4.670103448275862e-06, "loss": 0.4275, "step": 29150 }, { "epoch": 0.29, "grad_norm": 10.004756927490234, "learning_rate": 4.669672413793103e-06, "loss": 0.2362, "step": 29175 }, { "epoch": 0.29, "grad_norm": 16.8050594329834, "learning_rate": 4.669241379310346e-06, "loss": 0.3943, "step": 29200 }, { "epoch": 0.29, "grad_norm": 8.92764949798584, "learning_rate": 4.668810344827587e-06, "loss": 0.2225, "step": 29225 }, { "epoch": 0.29, "grad_norm": 10.049118995666504, "learning_rate": 4.668379310344828e-06, "loss": 0.4572, "step": 29250 }, { "epoch": 0.29, "grad_norm": 9.517155647277832, "learning_rate": 4.667948275862069e-06, "loss": 0.2447, "step": 29275 }, { "epoch": 0.29, "grad_norm": 13.780403137207031, "learning_rate": 4.667517241379311e-06, "loss": 0.403, "step": 29300 }, { "epoch": 0.29, "grad_norm": 12.136903762817383, "learning_rate": 4.667086206896552e-06, "loss": 0.2288, "step": 29325 }, { "epoch": 0.29, "grad_norm": 19.64506721496582, "learning_rate": 4.6666551724137935e-06, "loss": 0.4544, "step": 29350 }, { "epoch": 0.29, "grad_norm": 5.415404796600342, "learning_rate": 4.666224137931035e-06, "loss": 0.2429, "step": 29375 }, { "epoch": 0.29, "grad_norm": 13.443830490112305, "learning_rate": 4.6657931034482765e-06, "loss": 0.3866, "step": 29400 }, { "epoch": 0.29, "grad_norm": 12.780284881591797, "learning_rate": 4.665362068965518e-06, "loss": 0.2909, "step": 29425 }, { "epoch": 0.29, "grad_norm": 19.635631561279297, "learning_rate": 4.664931034482759e-06, "loss": 0.4217, "step": 29450 }, { "epoch": 0.29, "grad_norm": 11.784931182861328, "learning_rate": 4.6645e-06, "loss": 0.2113, "step": 29475 }, { "epoch": 0.29, "grad_norm": 17.67096710205078, "learning_rate": 4.6640689655172414e-06, "loss": 0.447, "step": 29500 }, { "epoch": 0.29, "grad_norm": 10.447591781616211, "learning_rate": 4.663637931034483e-06, "loss": 0.2415, "step": 29525 }, { "epoch": 0.29, "grad_norm": 14.987683296203613, "learning_rate": 4.663206896551724e-06, "loss": 0.4336, "step": 29550 }, { "epoch": 0.29, "grad_norm": 11.734210968017578, "learning_rate": 4.662775862068966e-06, "loss": 0.2837, "step": 29575 }, { "epoch": 0.29, "grad_norm": 21.497825622558594, "learning_rate": 4.662344827586207e-06, "loss": 0.3747, "step": 29600 }, { "epoch": 0.29, "grad_norm": 8.344207763671875, "learning_rate": 4.661913793103449e-06, "loss": 0.2305, "step": 29625 }, { "epoch": 0.29, "grad_norm": 19.54169273376465, "learning_rate": 4.66148275862069e-06, "loss": 0.4991, "step": 29650 }, { "epoch": 0.29, "grad_norm": 8.843050003051758, "learning_rate": 4.661051724137932e-06, "loss": 0.2365, "step": 29675 }, { "epoch": 0.29, "grad_norm": 10.026664733886719, "learning_rate": 4.660620689655172e-06, "loss": 0.4243, "step": 29700 }, { "epoch": 0.29, "grad_norm": 12.090875625610352, "learning_rate": 4.660189655172414e-06, "loss": 0.2412, "step": 29725 }, { "epoch": 0.29, "grad_norm": 22.055187225341797, "learning_rate": 4.659758620689655e-06, "loss": 0.4496, "step": 29750 }, { "epoch": 0.29, "grad_norm": 12.117634773254395, "learning_rate": 4.6593275862068975e-06, "loss": 0.2368, "step": 29775 }, { "epoch": 0.29, "grad_norm": 12.687292098999023, "learning_rate": 4.658896551724139e-06, "loss": 0.4117, "step": 29800 }, { "epoch": 0.29, "grad_norm": 10.25261116027832, "learning_rate": 4.6584655172413795e-06, "loss": 0.2478, "step": 29825 }, { "epoch": 0.29, "grad_norm": 19.413862228393555, "learning_rate": 4.658034482758621e-06, "loss": 0.4112, "step": 29850 }, { "epoch": 0.29, "grad_norm": 10.431156158447266, "learning_rate": 4.6576034482758624e-06, "loss": 0.232, "step": 29875 }, { "epoch": 0.29, "grad_norm": 20.928022384643555, "learning_rate": 4.657172413793104e-06, "loss": 0.4545, "step": 29900 }, { "epoch": 0.29, "grad_norm": 10.113378524780273, "learning_rate": 4.656741379310345e-06, "loss": 0.2394, "step": 29925 }, { "epoch": 0.29, "grad_norm": 13.610897064208984, "learning_rate": 4.656310344827587e-06, "loss": 0.4452, "step": 29950 }, { "epoch": 0.29, "grad_norm": 5.373023986816406, "learning_rate": 4.655879310344828e-06, "loss": 0.2419, "step": 29975 }, { "epoch": 0.3, "grad_norm": 13.912967681884766, "learning_rate": 4.65544827586207e-06, "loss": 0.4043, "step": 30000 }, { "epoch": 0.3, "grad_norm": 6.1425018310546875, "learning_rate": 4.655017241379311e-06, "loss": 0.2061, "step": 30025 }, { "epoch": 0.3, "grad_norm": 26.41990089416504, "learning_rate": 4.654586206896552e-06, "loss": 0.4703, "step": 30050 }, { "epoch": 0.3, "grad_norm": 11.428964614868164, "learning_rate": 4.654155172413793e-06, "loss": 0.2399, "step": 30075 }, { "epoch": 0.3, "grad_norm": 19.298551559448242, "learning_rate": 4.653724137931035e-06, "loss": 0.473, "step": 30100 }, { "epoch": 0.3, "grad_norm": 6.5105180740356445, "learning_rate": 4.653293103448276e-06, "loss": 0.2069, "step": 30125 }, { "epoch": 0.3, "grad_norm": 20.648603439331055, "learning_rate": 4.652862068965518e-06, "loss": 0.4661, "step": 30150 }, { "epoch": 0.3, "grad_norm": 13.023276329040527, "learning_rate": 4.652431034482759e-06, "loss": 0.2678, "step": 30175 }, { "epoch": 0.3, "grad_norm": 21.087146759033203, "learning_rate": 4.6520000000000005e-06, "loss": 0.4682, "step": 30200 }, { "epoch": 0.3, "grad_norm": 6.107218265533447, "learning_rate": 4.651568965517242e-06, "loss": 0.2511, "step": 30225 }, { "epoch": 0.3, "grad_norm": 13.207459449768066, "learning_rate": 4.6511379310344834e-06, "loss": 0.4172, "step": 30250 }, { "epoch": 0.3, "grad_norm": 10.652314186096191, "learning_rate": 4.650706896551724e-06, "loss": 0.218, "step": 30275 }, { "epoch": 0.3, "grad_norm": 13.49421215057373, "learning_rate": 4.6502758620689655e-06, "loss": 0.3601, "step": 30300 }, { "epoch": 0.3, "grad_norm": 14.047863006591797, "learning_rate": 4.649844827586207e-06, "loss": 0.2517, "step": 30325 }, { "epoch": 0.3, "grad_norm": 17.297988891601562, "learning_rate": 4.649413793103449e-06, "loss": 0.405, "step": 30350 }, { "epoch": 0.3, "grad_norm": 36.02064514160156, "learning_rate": 4.64898275862069e-06, "loss": 0.2246, "step": 30375 }, { "epoch": 0.3, "grad_norm": 17.666473388671875, "learning_rate": 4.648551724137931e-06, "loss": 0.4309, "step": 30400 }, { "epoch": 0.3, "grad_norm": 10.1343994140625, "learning_rate": 4.648120689655173e-06, "loss": 0.2635, "step": 30425 }, { "epoch": 0.3, "grad_norm": 24.228744506835938, "learning_rate": 4.647689655172414e-06, "loss": 0.4806, "step": 30450 }, { "epoch": 0.3, "grad_norm": 10.447992324829102, "learning_rate": 4.647258620689656e-06, "loss": 0.2374, "step": 30475 }, { "epoch": 0.3, "grad_norm": 16.631990432739258, "learning_rate": 4.646827586206897e-06, "loss": 0.4082, "step": 30500 }, { "epoch": 0.3, "grad_norm": 12.645363807678223, "learning_rate": 4.646396551724139e-06, "loss": 0.215, "step": 30525 }, { "epoch": 0.3, "grad_norm": 21.11025047302246, "learning_rate": 4.64596551724138e-06, "loss": 0.4962, "step": 30550 }, { "epoch": 0.3, "grad_norm": 6.348540782928467, "learning_rate": 4.6455344827586215e-06, "loss": 0.244, "step": 30575 }, { "epoch": 0.3, "grad_norm": 27.514192581176758, "learning_rate": 4.645103448275862e-06, "loss": 0.4381, "step": 30600 }, { "epoch": 0.3, "grad_norm": 15.239444732666016, "learning_rate": 4.644672413793104e-06, "loss": 0.2594, "step": 30625 }, { "epoch": 0.3, "grad_norm": 15.485838890075684, "learning_rate": 4.644241379310345e-06, "loss": 0.4757, "step": 30650 }, { "epoch": 0.3, "grad_norm": 16.044260025024414, "learning_rate": 4.6438103448275865e-06, "loss": 0.2273, "step": 30675 }, { "epoch": 0.3, "grad_norm": 19.430139541625977, "learning_rate": 4.643379310344828e-06, "loss": 0.4302, "step": 30700 }, { "epoch": 0.3, "grad_norm": 10.114975929260254, "learning_rate": 4.642948275862069e-06, "loss": 0.2878, "step": 30725 }, { "epoch": 0.3, "grad_norm": 20.289304733276367, "learning_rate": 4.642517241379311e-06, "loss": 0.374, "step": 30750 }, { "epoch": 0.3, "grad_norm": 8.737314224243164, "learning_rate": 4.642086206896552e-06, "loss": 0.2398, "step": 30775 }, { "epoch": 0.3, "grad_norm": 20.892263412475586, "learning_rate": 4.641655172413794e-06, "loss": 0.4355, "step": 30800 }, { "epoch": 0.3, "grad_norm": 11.888131141662598, "learning_rate": 4.641224137931034e-06, "loss": 0.2918, "step": 30825 }, { "epoch": 0.3, "grad_norm": 18.680269241333008, "learning_rate": 4.640793103448276e-06, "loss": 0.4133, "step": 30850 }, { "epoch": 0.3, "grad_norm": 15.121735572814941, "learning_rate": 4.640362068965517e-06, "loss": 0.2358, "step": 30875 }, { "epoch": 0.3, "grad_norm": 20.075641632080078, "learning_rate": 4.639931034482759e-06, "loss": 0.4893, "step": 30900 }, { "epoch": 0.3, "grad_norm": 9.736927032470703, "learning_rate": 4.6395e-06, "loss": 0.2856, "step": 30925 }, { "epoch": 0.3, "grad_norm": 21.11290740966797, "learning_rate": 4.639068965517242e-06, "loss": 0.5179, "step": 30950 }, { "epoch": 0.3, "grad_norm": 7.786890029907227, "learning_rate": 4.638637931034483e-06, "loss": 0.2176, "step": 30975 }, { "epoch": 0.3, "grad_norm": 29.528425216674805, "learning_rate": 4.638206896551725e-06, "loss": 0.4049, "step": 31000 }, { "epoch": 0.31, "grad_norm": 16.67816734313965, "learning_rate": 4.637775862068966e-06, "loss": 0.272, "step": 31025 }, { "epoch": 0.31, "grad_norm": 19.016613006591797, "learning_rate": 4.637344827586207e-06, "loss": 0.4438, "step": 31050 }, { "epoch": 0.31, "grad_norm": 7.978046417236328, "learning_rate": 4.636913793103449e-06, "loss": 0.2307, "step": 31075 }, { "epoch": 0.31, "grad_norm": 24.511001586914062, "learning_rate": 4.63648275862069e-06, "loss": 0.4776, "step": 31100 }, { "epoch": 0.31, "grad_norm": 8.306988716125488, "learning_rate": 4.636051724137932e-06, "loss": 0.2094, "step": 31125 }, { "epoch": 0.31, "grad_norm": 24.383163452148438, "learning_rate": 4.6356206896551725e-06, "loss": 0.4305, "step": 31150 }, { "epoch": 0.31, "grad_norm": 8.624475479125977, "learning_rate": 4.635189655172414e-06, "loss": 0.254, "step": 31175 }, { "epoch": 0.31, "grad_norm": 20.20587158203125, "learning_rate": 4.634758620689655e-06, "loss": 0.4376, "step": 31200 }, { "epoch": 0.31, "grad_norm": 11.219672203063965, "learning_rate": 4.634327586206897e-06, "loss": 0.2188, "step": 31225 }, { "epoch": 0.31, "grad_norm": 12.466395378112793, "learning_rate": 4.633896551724138e-06, "loss": 0.4754, "step": 31250 }, { "epoch": 0.31, "grad_norm": 21.20349884033203, "learning_rate": 4.63346551724138e-06, "loss": 0.2406, "step": 31275 }, { "epoch": 0.31, "grad_norm": 17.37843132019043, "learning_rate": 4.633034482758621e-06, "loss": 0.4326, "step": 31300 }, { "epoch": 0.31, "grad_norm": 12.33038330078125, "learning_rate": 4.632603448275863e-06, "loss": 0.2615, "step": 31325 }, { "epoch": 0.31, "grad_norm": 21.6293888092041, "learning_rate": 4.632172413793104e-06, "loss": 0.4074, "step": 31350 }, { "epoch": 0.31, "grad_norm": 10.321287155151367, "learning_rate": 4.631741379310345e-06, "loss": 0.2392, "step": 31375 }, { "epoch": 0.31, "grad_norm": 20.89072036743164, "learning_rate": 4.631310344827586e-06, "loss": 0.4083, "step": 31400 }, { "epoch": 0.31, "grad_norm": 9.06913948059082, "learning_rate": 4.630879310344828e-06, "loss": 0.2329, "step": 31425 }, { "epoch": 0.31, "grad_norm": 19.938716888427734, "learning_rate": 4.630448275862069e-06, "loss": 0.4083, "step": 31450 }, { "epoch": 0.31, "grad_norm": 6.835305690765381, "learning_rate": 4.6300172413793106e-06, "loss": 0.2116, "step": 31475 }, { "epoch": 0.31, "grad_norm": 17.466398239135742, "learning_rate": 4.629586206896552e-06, "loss": 0.4869, "step": 31500 }, { "epoch": 0.31, "grad_norm": 14.459651947021484, "learning_rate": 4.6291551724137935e-06, "loss": 0.2263, "step": 31525 }, { "epoch": 0.31, "grad_norm": 16.04737091064453, "learning_rate": 4.628724137931035e-06, "loss": 0.4053, "step": 31550 }, { "epoch": 0.31, "grad_norm": 15.81711483001709, "learning_rate": 4.628293103448276e-06, "loss": 0.2347, "step": 31575 }, { "epoch": 0.31, "grad_norm": 16.148740768432617, "learning_rate": 4.627862068965517e-06, "loss": 0.4121, "step": 31600 }, { "epoch": 0.31, "grad_norm": 7.762482643127441, "learning_rate": 4.6274310344827585e-06, "loss": 0.227, "step": 31625 }, { "epoch": 0.31, "grad_norm": 15.772491455078125, "learning_rate": 4.627000000000001e-06, "loss": 0.4876, "step": 31650 }, { "epoch": 0.31, "grad_norm": 15.080528259277344, "learning_rate": 4.626568965517242e-06, "loss": 0.2208, "step": 31675 }, { "epoch": 0.31, "grad_norm": 10.8023099899292, "learning_rate": 4.626137931034484e-06, "loss": 0.4529, "step": 31700 }, { "epoch": 0.31, "grad_norm": 10.393035888671875, "learning_rate": 4.625706896551724e-06, "loss": 0.1909, "step": 31725 }, { "epoch": 0.31, "grad_norm": 16.046236038208008, "learning_rate": 4.625275862068966e-06, "loss": 0.4848, "step": 31750 }, { "epoch": 0.31, "grad_norm": 13.070788383483887, "learning_rate": 4.624844827586207e-06, "loss": 0.2245, "step": 31775 }, { "epoch": 0.31, "grad_norm": 24.679264068603516, "learning_rate": 4.624413793103449e-06, "loss": 0.4523, "step": 31800 }, { "epoch": 0.31, "grad_norm": 8.948957443237305, "learning_rate": 4.62398275862069e-06, "loss": 0.2146, "step": 31825 }, { "epoch": 0.31, "grad_norm": 21.921157836914062, "learning_rate": 4.6235517241379316e-06, "loss": 0.4496, "step": 31850 }, { "epoch": 0.31, "grad_norm": 6.506702899932861, "learning_rate": 4.623120689655173e-06, "loss": 0.2324, "step": 31875 }, { "epoch": 0.31, "grad_norm": 18.744464874267578, "learning_rate": 4.6226896551724145e-06, "loss": 0.4646, "step": 31900 }, { "epoch": 0.31, "grad_norm": 13.210319519042969, "learning_rate": 4.622258620689656e-06, "loss": 0.2474, "step": 31925 }, { "epoch": 0.31, "grad_norm": 16.9099063873291, "learning_rate": 4.6218275862068965e-06, "loss": 0.388, "step": 31950 }, { "epoch": 0.31, "grad_norm": 17.47991180419922, "learning_rate": 4.621396551724138e-06, "loss": 0.2473, "step": 31975 }, { "epoch": 0.31, "grad_norm": 13.7235107421875, "learning_rate": 4.6209655172413795e-06, "loss": 0.4662, "step": 32000 }, { "epoch": 0.31, "grad_norm": 9.362832069396973, "learning_rate": 4.620534482758621e-06, "loss": 0.2394, "step": 32025 }, { "epoch": 0.32, "grad_norm": 10.167316436767578, "learning_rate": 4.620103448275862e-06, "loss": 0.4323, "step": 32050 }, { "epoch": 0.32, "grad_norm": 10.72275447845459, "learning_rate": 4.619672413793104e-06, "loss": 0.2531, "step": 32075 }, { "epoch": 0.32, "grad_norm": 19.404521942138672, "learning_rate": 4.619241379310345e-06, "loss": 0.4825, "step": 32100 }, { "epoch": 0.32, "grad_norm": 16.813501358032227, "learning_rate": 4.618810344827587e-06, "loss": 0.2211, "step": 32125 }, { "epoch": 0.32, "grad_norm": 22.748754501342773, "learning_rate": 4.618379310344828e-06, "loss": 0.4442, "step": 32150 }, { "epoch": 0.32, "grad_norm": 9.442584991455078, "learning_rate": 4.617948275862069e-06, "loss": 0.2218, "step": 32175 }, { "epoch": 0.32, "grad_norm": 20.500411987304688, "learning_rate": 4.61751724137931e-06, "loss": 0.4296, "step": 32200 }, { "epoch": 0.32, "grad_norm": 10.621275901794434, "learning_rate": 4.6170862068965526e-06, "loss": 0.2232, "step": 32225 }, { "epoch": 0.32, "grad_norm": 17.398677825927734, "learning_rate": 4.616655172413794e-06, "loss": 0.4111, "step": 32250 }, { "epoch": 0.32, "grad_norm": 7.477029323577881, "learning_rate": 4.616224137931035e-06, "loss": 0.2134, "step": 32275 }, { "epoch": 0.32, "grad_norm": 22.610807418823242, "learning_rate": 4.615810344827586e-06, "loss": 0.4025, "step": 32300 }, { "epoch": 0.32, "grad_norm": 13.62160873413086, "learning_rate": 4.615379310344828e-06, "loss": 0.2457, "step": 32325 }, { "epoch": 0.32, "grad_norm": 17.755901336669922, "learning_rate": 4.61494827586207e-06, "loss": 0.4455, "step": 32350 }, { "epoch": 0.32, "grad_norm": 10.496109008789062, "learning_rate": 4.6145172413793105e-06, "loss": 0.1991, "step": 32375 }, { "epoch": 0.32, "grad_norm": 18.266992568969727, "learning_rate": 4.614086206896552e-06, "loss": 0.4516, "step": 32400 }, { "epoch": 0.32, "grad_norm": 11.188334465026855, "learning_rate": 4.613655172413793e-06, "loss": 0.2525, "step": 32425 }, { "epoch": 0.32, "grad_norm": 19.53459930419922, "learning_rate": 4.613224137931035e-06, "loss": 0.3796, "step": 32450 }, { "epoch": 0.32, "grad_norm": 8.559452056884766, "learning_rate": 4.612793103448276e-06, "loss": 0.1975, "step": 32475 }, { "epoch": 0.32, "grad_norm": 12.392871856689453, "learning_rate": 4.612362068965518e-06, "loss": 0.4614, "step": 32500 }, { "epoch": 0.32, "grad_norm": 12.147369384765625, "learning_rate": 4.611931034482759e-06, "loss": 0.2489, "step": 32525 }, { "epoch": 0.32, "grad_norm": 19.352712631225586, "learning_rate": 4.611500000000001e-06, "loss": 0.4932, "step": 32550 }, { "epoch": 0.32, "grad_norm": 10.745396614074707, "learning_rate": 4.611068965517242e-06, "loss": 0.2356, "step": 32575 }, { "epoch": 0.32, "grad_norm": 20.157941818237305, "learning_rate": 4.610637931034483e-06, "loss": 0.4991, "step": 32600 }, { "epoch": 0.32, "grad_norm": 10.980483055114746, "learning_rate": 4.610206896551724e-06, "loss": 0.2287, "step": 32625 }, { "epoch": 0.32, "grad_norm": 20.84232521057129, "learning_rate": 4.609775862068966e-06, "loss": 0.4233, "step": 32650 }, { "epoch": 0.32, "grad_norm": 12.896757125854492, "learning_rate": 4.609344827586207e-06, "loss": 0.2602, "step": 32675 }, { "epoch": 0.32, "grad_norm": 18.6009464263916, "learning_rate": 4.6089137931034485e-06, "loss": 0.5269, "step": 32700 }, { "epoch": 0.32, "grad_norm": 9.072294235229492, "learning_rate": 4.60848275862069e-06, "loss": 0.2427, "step": 32725 }, { "epoch": 0.32, "grad_norm": 14.35698413848877, "learning_rate": 4.6080517241379315e-06, "loss": 0.4249, "step": 32750 }, { "epoch": 0.32, "grad_norm": 7.764148235321045, "learning_rate": 4.607620689655173e-06, "loss": 0.2007, "step": 32775 }, { "epoch": 0.32, "grad_norm": 17.953882217407227, "learning_rate": 4.607189655172414e-06, "loss": 0.4811, "step": 32800 }, { "epoch": 0.32, "grad_norm": 17.69788932800293, "learning_rate": 4.606758620689655e-06, "loss": 0.1847, "step": 32825 }, { "epoch": 0.32, "grad_norm": 19.06618881225586, "learning_rate": 4.6063275862068964e-06, "loss": 0.4254, "step": 32850 }, { "epoch": 0.32, "grad_norm": 10.816181182861328, "learning_rate": 4.605896551724138e-06, "loss": 0.2433, "step": 32875 }, { "epoch": 0.32, "grad_norm": 14.781259536743164, "learning_rate": 4.60546551724138e-06, "loss": 0.4575, "step": 32900 }, { "epoch": 0.32, "grad_norm": 11.182438850402832, "learning_rate": 4.605034482758622e-06, "loss": 0.2552, "step": 32925 }, { "epoch": 0.32, "grad_norm": 19.871673583984375, "learning_rate": 4.604603448275862e-06, "loss": 0.4774, "step": 32950 }, { "epoch": 0.32, "grad_norm": 4.526224613189697, "learning_rate": 4.604172413793104e-06, "loss": 0.2542, "step": 32975 }, { "epoch": 0.32, "grad_norm": 21.321109771728516, "learning_rate": 4.603741379310345e-06, "loss": 0.4936, "step": 33000 }, { "epoch": 0.32, "grad_norm": 13.108400344848633, "learning_rate": 4.603310344827587e-06, "loss": 0.2473, "step": 33025 }, { "epoch": 0.33, "grad_norm": 21.458839416503906, "learning_rate": 4.602879310344828e-06, "loss": 0.4267, "step": 33050 }, { "epoch": 0.33, "grad_norm": 8.571017265319824, "learning_rate": 4.6024482758620695e-06, "loss": 0.2372, "step": 33075 }, { "epoch": 0.33, "grad_norm": 21.11285972595215, "learning_rate": 4.602017241379311e-06, "loss": 0.3962, "step": 33100 }, { "epoch": 0.33, "grad_norm": 12.514413833618164, "learning_rate": 4.6015862068965525e-06, "loss": 0.2379, "step": 33125 }, { "epoch": 0.33, "grad_norm": 24.003267288208008, "learning_rate": 4.601155172413794e-06, "loss": 0.4337, "step": 33150 }, { "epoch": 0.33, "grad_norm": 8.566792488098145, "learning_rate": 4.6007241379310345e-06, "loss": 0.2428, "step": 33175 }, { "epoch": 0.33, "grad_norm": 18.705020904541016, "learning_rate": 4.600293103448276e-06, "loss": 0.4599, "step": 33200 }, { "epoch": 0.33, "grad_norm": 12.188066482543945, "learning_rate": 4.5998620689655174e-06, "loss": 0.246, "step": 33225 }, { "epoch": 0.33, "grad_norm": 19.027624130249023, "learning_rate": 4.599431034482759e-06, "loss": 0.4515, "step": 33250 }, { "epoch": 0.33, "grad_norm": 9.603379249572754, "learning_rate": 4.599e-06, "loss": 0.2333, "step": 33275 }, { "epoch": 0.33, "grad_norm": 17.246383666992188, "learning_rate": 4.598568965517242e-06, "loss": 0.4949, "step": 33300 }, { "epoch": 0.33, "grad_norm": 16.334169387817383, "learning_rate": 4.598137931034483e-06, "loss": 0.2222, "step": 33325 }, { "epoch": 0.33, "grad_norm": 19.76702880859375, "learning_rate": 4.597706896551725e-06, "loss": 0.486, "step": 33350 }, { "epoch": 0.33, "grad_norm": 10.450446128845215, "learning_rate": 4.597275862068966e-06, "loss": 0.2504, "step": 33375 }, { "epoch": 0.33, "grad_norm": 19.470735549926758, "learning_rate": 4.596844827586207e-06, "loss": 0.5036, "step": 33400 }, { "epoch": 0.33, "grad_norm": 6.975928783416748, "learning_rate": 4.596413793103448e-06, "loss": 0.2125, "step": 33425 }, { "epoch": 0.33, "grad_norm": 24.71648597717285, "learning_rate": 4.59598275862069e-06, "loss": 0.3976, "step": 33450 }, { "epoch": 0.33, "grad_norm": 13.640336036682129, "learning_rate": 4.595551724137932e-06, "loss": 0.2178, "step": 33475 }, { "epoch": 0.33, "grad_norm": 10.2199068069458, "learning_rate": 4.595120689655173e-06, "loss": 0.446, "step": 33500 }, { "epoch": 0.33, "grad_norm": 13.743054389953613, "learning_rate": 4.594689655172414e-06, "loss": 0.2245, "step": 33525 }, { "epoch": 0.33, "grad_norm": 22.230247497558594, "learning_rate": 4.5942586206896555e-06, "loss": 0.4159, "step": 33550 }, { "epoch": 0.33, "grad_norm": 17.857633590698242, "learning_rate": 4.593827586206897e-06, "loss": 0.2475, "step": 33575 }, { "epoch": 0.33, "grad_norm": 17.220199584960938, "learning_rate": 4.5933965517241384e-06, "loss": 0.3891, "step": 33600 }, { "epoch": 0.33, "grad_norm": 9.280318260192871, "learning_rate": 4.59296551724138e-06, "loss": 0.2059, "step": 33625 }, { "epoch": 0.33, "grad_norm": 16.50266456604004, "learning_rate": 4.592534482758621e-06, "loss": 0.3778, "step": 33650 }, { "epoch": 0.33, "grad_norm": 9.030854225158691, "learning_rate": 4.592103448275863e-06, "loss": 0.2447, "step": 33675 }, { "epoch": 0.33, "grad_norm": 20.926837921142578, "learning_rate": 4.591672413793104e-06, "loss": 0.4122, "step": 33700 }, { "epoch": 0.33, "grad_norm": 8.524828910827637, "learning_rate": 4.591241379310345e-06, "loss": 0.2194, "step": 33725 }, { "epoch": 0.33, "grad_norm": 15.60066032409668, "learning_rate": 4.590810344827586e-06, "loss": 0.5475, "step": 33750 }, { "epoch": 0.33, "grad_norm": 10.214031219482422, "learning_rate": 4.590379310344828e-06, "loss": 0.253, "step": 33775 }, { "epoch": 0.33, "grad_norm": 20.357423782348633, "learning_rate": 4.589948275862069e-06, "loss": 0.4307, "step": 33800 }, { "epoch": 0.33, "grad_norm": 16.04332160949707, "learning_rate": 4.589517241379311e-06, "loss": 0.2424, "step": 33825 }, { "epoch": 0.33, "grad_norm": 19.228965759277344, "learning_rate": 4.589086206896552e-06, "loss": 0.4572, "step": 33850 }, { "epoch": 0.33, "grad_norm": 12.988730430603027, "learning_rate": 4.588655172413794e-06, "loss": 0.2352, "step": 33875 }, { "epoch": 0.33, "grad_norm": 21.014814376831055, "learning_rate": 4.588224137931035e-06, "loss": 0.5016, "step": 33900 }, { "epoch": 0.33, "grad_norm": 10.381009101867676, "learning_rate": 4.5877931034482765e-06, "loss": 0.2022, "step": 33925 }, { "epoch": 0.33, "grad_norm": 16.235061645507812, "learning_rate": 4.587362068965517e-06, "loss": 0.4225, "step": 33950 }, { "epoch": 0.33, "grad_norm": 8.519170761108398, "learning_rate": 4.586931034482759e-06, "loss": 0.277, "step": 33975 }, { "epoch": 0.33, "grad_norm": 17.401416778564453, "learning_rate": 4.5865e-06, "loss": 0.4161, "step": 34000 }, { "epoch": 0.33, "grad_norm": 9.573519706726074, "learning_rate": 4.5860689655172415e-06, "loss": 0.2373, "step": 34025 }, { "epoch": 0.33, "grad_norm": 11.387260437011719, "learning_rate": 4.585637931034484e-06, "loss": 0.4891, "step": 34050 }, { "epoch": 0.34, "grad_norm": 13.718013763427734, "learning_rate": 4.585206896551724e-06, "loss": 0.2663, "step": 34075 }, { "epoch": 0.34, "grad_norm": 11.033133506774902, "learning_rate": 4.584775862068966e-06, "loss": 0.4541, "step": 34100 }, { "epoch": 0.34, "grad_norm": 12.905435562133789, "learning_rate": 4.584344827586207e-06, "loss": 0.2156, "step": 34125 }, { "epoch": 0.34, "grad_norm": 17.95785903930664, "learning_rate": 4.583913793103449e-06, "loss": 0.4583, "step": 34150 }, { "epoch": 0.34, "grad_norm": 7.016481399536133, "learning_rate": 4.583482758620689e-06, "loss": 0.2118, "step": 34175 }, { "epoch": 0.34, "grad_norm": 18.53436851501465, "learning_rate": 4.583051724137932e-06, "loss": 0.4305, "step": 34200 }, { "epoch": 0.34, "grad_norm": 11.027002334594727, "learning_rate": 4.582620689655173e-06, "loss": 0.2341, "step": 34225 }, { "epoch": 0.34, "grad_norm": 18.037078857421875, "learning_rate": 4.582189655172415e-06, "loss": 0.4567, "step": 34250 }, { "epoch": 0.34, "grad_norm": 5.517264366149902, "learning_rate": 4.581758620689656e-06, "loss": 0.243, "step": 34275 }, { "epoch": 0.34, "grad_norm": Infinity, "learning_rate": 4.5813448275862075e-06, "loss": 0.4535, "step": 34300 }, { "epoch": 0.34, "grad_norm": 6.954440593719482, "learning_rate": 4.580913793103449e-06, "loss": 0.2289, "step": 34325 }, { "epoch": 0.34, "grad_norm": 21.990610122680664, "learning_rate": 4.5804827586206904e-06, "loss": 0.4592, "step": 34350 }, { "epoch": 0.34, "grad_norm": 18.31471061706543, "learning_rate": 4.580051724137932e-06, "loss": 0.2503, "step": 34375 }, { "epoch": 0.34, "grad_norm": 13.39028549194336, "learning_rate": 4.5796206896551725e-06, "loss": 0.4725, "step": 34400 }, { "epoch": 0.34, "grad_norm": 5.893749237060547, "learning_rate": 4.579189655172414e-06, "loss": 0.2182, "step": 34425 }, { "epoch": 0.34, "grad_norm": 17.258174896240234, "learning_rate": 4.578758620689655e-06, "loss": 0.4401, "step": 34450 }, { "epoch": 0.34, "grad_norm": 7.028522491455078, "learning_rate": 4.578327586206897e-06, "loss": 0.2522, "step": 34475 }, { "epoch": 0.34, "grad_norm": 14.585461616516113, "learning_rate": 4.577896551724138e-06, "loss": 0.4038, "step": 34500 }, { "epoch": 0.34, "grad_norm": 12.922981262207031, "learning_rate": 4.57746551724138e-06, "loss": 0.22, "step": 34525 }, { "epoch": 0.34, "grad_norm": 16.701820373535156, "learning_rate": 4.577034482758621e-06, "loss": 0.4495, "step": 34550 }, { "epoch": 0.34, "grad_norm": 9.911236763000488, "learning_rate": 4.576603448275863e-06, "loss": 0.2537, "step": 34575 }, { "epoch": 0.34, "grad_norm": 22.58712387084961, "learning_rate": 4.576172413793104e-06, "loss": 0.4761, "step": 34600 }, { "epoch": 0.34, "grad_norm": 14.7594575881958, "learning_rate": 4.575741379310345e-06, "loss": 0.2469, "step": 34625 }, { "epoch": 0.34, "grad_norm": 22.24861717224121, "learning_rate": 4.575310344827586e-06, "loss": 0.3971, "step": 34650 }, { "epoch": 0.34, "grad_norm": 7.217987060546875, "learning_rate": 4.574879310344828e-06, "loss": 0.2237, "step": 34675 }, { "epoch": 0.34, "grad_norm": 16.784807205200195, "learning_rate": 4.574448275862069e-06, "loss": 0.4934, "step": 34700 }, { "epoch": 0.34, "grad_norm": 12.547991752624512, "learning_rate": 4.574017241379311e-06, "loss": 0.2781, "step": 34725 }, { "epoch": 0.34, "grad_norm": 19.348617553710938, "learning_rate": 4.573586206896552e-06, "loss": 0.3754, "step": 34750 }, { "epoch": 0.34, "grad_norm": 9.596561431884766, "learning_rate": 4.5731551724137935e-06, "loss": 0.2503, "step": 34775 }, { "epoch": 0.34, "grad_norm": 34.073890686035156, "learning_rate": 4.572724137931035e-06, "loss": 0.3865, "step": 34800 }, { "epoch": 0.34, "grad_norm": 9.97548770904541, "learning_rate": 4.572293103448276e-06, "loss": 0.26, "step": 34825 }, { "epoch": 0.34, "grad_norm": 15.445273399353027, "learning_rate": 4.571862068965517e-06, "loss": 0.4039, "step": 34850 }, { "epoch": 0.34, "grad_norm": 8.901451110839844, "learning_rate": 4.571431034482759e-06, "loss": 0.213, "step": 34875 }, { "epoch": 0.34, "grad_norm": 14.856849670410156, "learning_rate": 4.571000000000001e-06, "loss": 0.4811, "step": 34900 }, { "epoch": 0.34, "grad_norm": 10.186464309692383, "learning_rate": 4.570568965517242e-06, "loss": 0.232, "step": 34925 }, { "epoch": 0.34, "grad_norm": 19.420026779174805, "learning_rate": 4.570137931034483e-06, "loss": 0.375, "step": 34950 }, { "epoch": 0.34, "grad_norm": 10.647517204284668, "learning_rate": 4.569706896551724e-06, "loss": 0.2132, "step": 34975 }, { "epoch": 0.34, "grad_norm": 18.619325637817383, "learning_rate": 4.569275862068966e-06, "loss": 0.4602, "step": 35000 }, { "epoch": 0.34, "grad_norm": 18.49846649169922, "learning_rate": 4.568844827586207e-06, "loss": 0.2428, "step": 35025 }, { "epoch": 0.34, "grad_norm": 16.416194915771484, "learning_rate": 4.568413793103449e-06, "loss": 0.4627, "step": 35050 }, { "epoch": 0.34, "grad_norm": 7.371669769287109, "learning_rate": 4.56798275862069e-06, "loss": 0.2399, "step": 35075 }, { "epoch": 0.35, "grad_norm": 14.323856353759766, "learning_rate": 4.567551724137932e-06, "loss": 0.4161, "step": 35100 }, { "epoch": 0.35, "grad_norm": 4.8854498863220215, "learning_rate": 4.567120689655173e-06, "loss": 0.2567, "step": 35125 }, { "epoch": 0.35, "grad_norm": 26.51576042175293, "learning_rate": 4.5666896551724145e-06, "loss": 0.48, "step": 35150 }, { "epoch": 0.35, "grad_norm": 19.45383644104004, "learning_rate": 4.566258620689655e-06, "loss": 0.2211, "step": 35175 }, { "epoch": 0.35, "grad_norm": 13.257841110229492, "learning_rate": 4.5658275862068966e-06, "loss": 0.3925, "step": 35200 }, { "epoch": 0.35, "grad_norm": 7.743846416473389, "learning_rate": 4.565396551724138e-06, "loss": 0.2493, "step": 35225 }, { "epoch": 0.35, "grad_norm": 15.373819351196289, "learning_rate": 4.5649655172413795e-06, "loss": 0.43, "step": 35250 }, { "epoch": 0.35, "grad_norm": 10.170103073120117, "learning_rate": 4.564534482758621e-06, "loss": 0.2149, "step": 35275 }, { "epoch": 0.35, "grad_norm": 14.253900527954102, "learning_rate": 4.564103448275862e-06, "loss": 0.4263, "step": 35300 }, { "epoch": 0.35, "grad_norm": 8.57663631439209, "learning_rate": 4.563672413793104e-06, "loss": 0.1864, "step": 35325 }, { "epoch": 0.35, "grad_norm": 16.091428756713867, "learning_rate": 4.563241379310345e-06, "loss": 0.4008, "step": 35350 }, { "epoch": 0.35, "grad_norm": 6.473926067352295, "learning_rate": 4.562810344827587e-06, "loss": 0.2088, "step": 35375 }, { "epoch": 0.35, "grad_norm": 18.922487258911133, "learning_rate": 4.562379310344827e-06, "loss": 0.4512, "step": 35400 }, { "epoch": 0.35, "grad_norm": 7.153152942657471, "learning_rate": 4.561948275862069e-06, "loss": 0.3019, "step": 35425 }, { "epoch": 0.35, "grad_norm": 24.186861038208008, "learning_rate": 4.561517241379311e-06, "loss": 0.4707, "step": 35450 }, { "epoch": 0.35, "grad_norm": 5.184237480163574, "learning_rate": 4.561086206896553e-06, "loss": 0.2244, "step": 35475 }, { "epoch": 0.35, "grad_norm": 22.22921371459961, "learning_rate": 4.560655172413794e-06, "loss": 0.3851, "step": 35500 }, { "epoch": 0.35, "grad_norm": 9.635741233825684, "learning_rate": 4.560224137931035e-06, "loss": 0.2907, "step": 35525 }, { "epoch": 0.35, "grad_norm": 24.04785919189453, "learning_rate": 4.559793103448276e-06, "loss": 0.4496, "step": 35550 }, { "epoch": 0.35, "grad_norm": 18.419635772705078, "learning_rate": 4.5593620689655176e-06, "loss": 0.2535, "step": 35575 }, { "epoch": 0.35, "grad_norm": 12.079648971557617, "learning_rate": 4.558931034482759e-06, "loss": 0.429, "step": 35600 }, { "epoch": 0.35, "grad_norm": 11.46465015411377, "learning_rate": 4.5585000000000005e-06, "loss": 0.2372, "step": 35625 }, { "epoch": 0.35, "grad_norm": 15.472786903381348, "learning_rate": 4.558068965517242e-06, "loss": 0.3917, "step": 35650 }, { "epoch": 0.35, "grad_norm": 9.883108139038086, "learning_rate": 4.557637931034483e-06, "loss": 0.2708, "step": 35675 }, { "epoch": 0.35, "grad_norm": 15.133625030517578, "learning_rate": 4.557206896551725e-06, "loss": 0.5093, "step": 35700 }, { "epoch": 0.35, "grad_norm": 16.168542861938477, "learning_rate": 4.556775862068966e-06, "loss": 0.2297, "step": 35725 }, { "epoch": 0.35, "grad_norm": 15.143312454223633, "learning_rate": 4.556344827586207e-06, "loss": 0.43, "step": 35750 }, { "epoch": 0.35, "grad_norm": 10.832181930541992, "learning_rate": 4.555913793103448e-06, "loss": 0.2329, "step": 35775 }, { "epoch": 0.35, "grad_norm": 20.21942138671875, "learning_rate": 4.55548275862069e-06, "loss": 0.4561, "step": 35800 }, { "epoch": 0.35, "grad_norm": 18.532882690429688, "learning_rate": 4.555051724137931e-06, "loss": 0.2549, "step": 35825 }, { "epoch": 0.35, "grad_norm": 14.40105152130127, "learning_rate": 4.554620689655173e-06, "loss": 0.4788, "step": 35850 }, { "epoch": 0.35, "grad_norm": 7.7133049964904785, "learning_rate": 4.554189655172414e-06, "loss": 0.2639, "step": 35875 }, { "epoch": 0.35, "grad_norm": 10.675856590270996, "learning_rate": 4.553758620689656e-06, "loss": 0.4369, "step": 35900 }, { "epoch": 0.35, "grad_norm": 8.607451438903809, "learning_rate": 4.553327586206897e-06, "loss": 0.2455, "step": 35925 }, { "epoch": 0.35, "grad_norm": 12.59421443939209, "learning_rate": 4.5528965517241386e-06, "loss": 0.4224, "step": 35950 }, { "epoch": 0.35, "grad_norm": 16.40057945251465, "learning_rate": 4.552465517241379e-06, "loss": 0.2549, "step": 35975 }, { "epoch": 0.35, "grad_norm": 14.580326080322266, "learning_rate": 4.552034482758621e-06, "loss": 0.4048, "step": 36000 }, { "epoch": 0.35, "grad_norm": 12.86327838897705, "learning_rate": 4.551603448275863e-06, "loss": 0.2783, "step": 36025 }, { "epoch": 0.35, "grad_norm": 21.11622428894043, "learning_rate": 4.551172413793104e-06, "loss": 0.4312, "step": 36050 }, { "epoch": 0.35, "grad_norm": 9.655136108398438, "learning_rate": 4.550741379310345e-06, "loss": 0.2541, "step": 36075 }, { "epoch": 0.36, "grad_norm": 17.243911743164062, "learning_rate": 4.5503103448275865e-06, "loss": 0.4618, "step": 36100 }, { "epoch": 0.36, "grad_norm": 6.922300338745117, "learning_rate": 4.549879310344828e-06, "loss": 0.2341, "step": 36125 }, { "epoch": 0.36, "grad_norm": 13.227926254272461, "learning_rate": 4.549448275862069e-06, "loss": 0.4218, "step": 36150 }, { "epoch": 0.36, "grad_norm": 11.272886276245117, "learning_rate": 4.549017241379311e-06, "loss": 0.2488, "step": 36175 }, { "epoch": 0.36, "grad_norm": 21.24005126953125, "learning_rate": 4.548586206896552e-06, "loss": 0.4461, "step": 36200 }, { "epoch": 0.36, "grad_norm": 18.105485916137695, "learning_rate": 4.548155172413794e-06, "loss": 0.2734, "step": 36225 }, { "epoch": 0.36, "grad_norm": 39.76256561279297, "learning_rate": 4.547724137931035e-06, "loss": 0.4189, "step": 36250 }, { "epoch": 0.36, "grad_norm": 6.854281902313232, "learning_rate": 4.547293103448277e-06, "loss": 0.2462, "step": 36275 }, { "epoch": 0.36, "grad_norm": 10.067536354064941, "learning_rate": 4.546862068965517e-06, "loss": 0.472, "step": 36300 }, { "epoch": 0.36, "grad_norm": 13.623215675354004, "learning_rate": 4.546431034482759e-06, "loss": 0.2661, "step": 36325 }, { "epoch": 0.36, "grad_norm": Infinity, "learning_rate": 4.546017241379311e-06, "loss": 0.486, "step": 36350 }, { "epoch": 0.36, "grad_norm": 12.449553489685059, "learning_rate": 4.5455862068965525e-06, "loss": 0.2241, "step": 36375 }, { "epoch": 0.36, "grad_norm": 16.05875587463379, "learning_rate": 4.545155172413793e-06, "loss": 0.4223, "step": 36400 }, { "epoch": 0.36, "grad_norm": 14.277900695800781, "learning_rate": 4.5447241379310346e-06, "loss": 0.2637, "step": 36425 }, { "epoch": 0.36, "grad_norm": 28.767972946166992, "learning_rate": 4.544293103448276e-06, "loss": 0.4872, "step": 36450 }, { "epoch": 0.36, "grad_norm": 6.966751575469971, "learning_rate": 4.5438620689655175e-06, "loss": 0.2793, "step": 36475 }, { "epoch": 0.36, "grad_norm": 11.752128601074219, "learning_rate": 4.543431034482759e-06, "loss": 0.4324, "step": 36500 }, { "epoch": 0.36, "grad_norm": 13.87728214263916, "learning_rate": 4.543e-06, "loss": 0.2508, "step": 36525 }, { "epoch": 0.36, "grad_norm": 14.561283111572266, "learning_rate": 4.542568965517242e-06, "loss": 0.4979, "step": 36550 }, { "epoch": 0.36, "grad_norm": 10.967752456665039, "learning_rate": 4.542137931034483e-06, "loss": 0.3174, "step": 36575 }, { "epoch": 0.36, "grad_norm": 17.224254608154297, "learning_rate": 4.541706896551725e-06, "loss": 0.3769, "step": 36600 }, { "epoch": 0.36, "grad_norm": 12.548196792602539, "learning_rate": 4.541275862068965e-06, "loss": 0.2423, "step": 36625 }, { "epoch": 0.36, "grad_norm": 21.904245376586914, "learning_rate": 4.540844827586207e-06, "loss": 0.4618, "step": 36650 }, { "epoch": 0.36, "grad_norm": 8.165678977966309, "learning_rate": 4.540413793103448e-06, "loss": 0.2288, "step": 36675 }, { "epoch": 0.36, "grad_norm": 20.360736846923828, "learning_rate": 4.5399827586206906e-06, "loss": 0.3906, "step": 36700 }, { "epoch": 0.36, "grad_norm": 19.429603576660156, "learning_rate": 4.539551724137932e-06, "loss": 0.2839, "step": 36725 }, { "epoch": 0.36, "grad_norm": 17.791460037231445, "learning_rate": 4.539120689655173e-06, "loss": 0.4771, "step": 36750 }, { "epoch": 0.36, "grad_norm": 9.232378005981445, "learning_rate": 4.538689655172414e-06, "loss": 0.2753, "step": 36775 }, { "epoch": 0.36, "grad_norm": 23.031705856323242, "learning_rate": 4.5382586206896556e-06, "loss": 0.4623, "step": 36800 }, { "epoch": 0.36, "grad_norm": 12.802433967590332, "learning_rate": 4.537827586206897e-06, "loss": 0.2276, "step": 36825 }, { "epoch": 0.36, "grad_norm": 21.896512985229492, "learning_rate": 4.5373965517241385e-06, "loss": 0.4631, "step": 36850 }, { "epoch": 0.36, "grad_norm": 9.733945846557617, "learning_rate": 4.53696551724138e-06, "loss": 0.2044, "step": 36875 }, { "epoch": 0.36, "grad_norm": 24.906002044677734, "learning_rate": 4.536534482758621e-06, "loss": 0.4034, "step": 36900 }, { "epoch": 0.36, "grad_norm": 4.871549606323242, "learning_rate": 4.536103448275863e-06, "loss": 0.2527, "step": 36925 }, { "epoch": 0.36, "grad_norm": 14.266575813293457, "learning_rate": 4.535672413793104e-06, "loss": 0.4058, "step": 36950 }, { "epoch": 0.36, "grad_norm": 10.549525260925293, "learning_rate": 4.535241379310345e-06, "loss": 0.2534, "step": 36975 }, { "epoch": 0.36, "grad_norm": 22.521608352661133, "learning_rate": 4.534810344827586e-06, "loss": 0.4468, "step": 37000 }, { "epoch": 0.36, "grad_norm": 7.230848789215088, "learning_rate": 4.534379310344828e-06, "loss": 0.2542, "step": 37025 }, { "epoch": 0.36, "grad_norm": 16.58330726623535, "learning_rate": 4.533948275862069e-06, "loss": 0.4456, "step": 37050 }, { "epoch": 0.36, "grad_norm": 6.639136791229248, "learning_rate": 4.533517241379311e-06, "loss": 0.2801, "step": 37075 }, { "epoch": 0.36, "grad_norm": 21.421798706054688, "learning_rate": 4.533086206896552e-06, "loss": 0.4505, "step": 37100 }, { "epoch": 0.37, "grad_norm": 11.324969291687012, "learning_rate": 4.532655172413794e-06, "loss": 0.2018, "step": 37125 }, { "epoch": 0.37, "grad_norm": 21.275545120239258, "learning_rate": 4.532224137931035e-06, "loss": 0.4364, "step": 37150 }, { "epoch": 0.37, "grad_norm": 13.707534790039062, "learning_rate": 4.5317931034482766e-06, "loss": 0.2329, "step": 37175 }, { "epoch": 0.37, "grad_norm": 16.882862091064453, "learning_rate": 4.531362068965517e-06, "loss": 0.4294, "step": 37200 }, { "epoch": 0.37, "grad_norm": 14.602853775024414, "learning_rate": 4.530931034482759e-06, "loss": 0.2211, "step": 37225 }, { "epoch": 0.37, "grad_norm": 20.426992416381836, "learning_rate": 4.5305e-06, "loss": 0.377, "step": 37250 }, { "epoch": 0.37, "grad_norm": 14.478554725646973, "learning_rate": 4.530068965517242e-06, "loss": 0.2226, "step": 37275 }, { "epoch": 0.37, "grad_norm": 20.69317626953125, "learning_rate": 4.529637931034483e-06, "loss": 0.4659, "step": 37300 }, { "epoch": 0.37, "grad_norm": 5.7381181716918945, "learning_rate": 4.5292068965517244e-06, "loss": 0.2027, "step": 37325 }, { "epoch": 0.37, "grad_norm": 20.577362060546875, "learning_rate": 4.528775862068966e-06, "loss": 0.3958, "step": 37350 }, { "epoch": 0.37, "grad_norm": 9.819319725036621, "learning_rate": 4.528344827586207e-06, "loss": 0.2621, "step": 37375 }, { "epoch": 0.37, "grad_norm": 19.053503036499023, "learning_rate": 4.527913793103449e-06, "loss": 0.4435, "step": 37400 }, { "epoch": 0.37, "grad_norm": 16.93974494934082, "learning_rate": 4.52748275862069e-06, "loss": 0.2624, "step": 37425 }, { "epoch": 0.37, "grad_norm": 19.504837036132812, "learning_rate": 4.527051724137932e-06, "loss": 0.4313, "step": 37450 }, { "epoch": 0.37, "grad_norm": 9.148703575134277, "learning_rate": 4.526620689655173e-06, "loss": 0.2608, "step": 37475 }, { "epoch": 0.37, "grad_norm": 20.598779678344727, "learning_rate": 4.526189655172415e-06, "loss": 0.4666, "step": 37500 }, { "epoch": 0.37, "grad_norm": 11.622600555419922, "learning_rate": 4.525758620689655e-06, "loss": 0.2472, "step": 37525 }, { "epoch": 0.37, "grad_norm": 21.051633834838867, "learning_rate": 4.525327586206897e-06, "loss": 0.4542, "step": 37550 }, { "epoch": 0.37, "grad_norm": 12.363874435424805, "learning_rate": 4.524896551724138e-06, "loss": 0.2243, "step": 37575 }, { "epoch": 0.37, "grad_norm": 17.017532348632812, "learning_rate": 4.52446551724138e-06, "loss": 0.3862, "step": 37600 }, { "epoch": 0.37, "grad_norm": 11.935592651367188, "learning_rate": 4.524034482758621e-06, "loss": 0.2457, "step": 37625 }, { "epoch": 0.37, "grad_norm": 20.71429443359375, "learning_rate": 4.5236034482758625e-06, "loss": 0.407, "step": 37650 }, { "epoch": 0.37, "grad_norm": 8.786859512329102, "learning_rate": 4.523172413793104e-06, "loss": 0.2179, "step": 37675 }, { "epoch": 0.37, "grad_norm": 19.980670928955078, "learning_rate": 4.5227413793103454e-06, "loss": 0.4503, "step": 37700 }, { "epoch": 0.37, "grad_norm": 11.430849075317383, "learning_rate": 4.522310344827587e-06, "loss": 0.2507, "step": 37725 }, { "epoch": 0.37, "grad_norm": 22.24041748046875, "learning_rate": 4.5218793103448275e-06, "loss": 0.4734, "step": 37750 }, { "epoch": 0.37, "grad_norm": 10.671189308166504, "learning_rate": 4.521448275862069e-06, "loss": 0.2042, "step": 37775 }, { "epoch": 0.37, "grad_norm": 19.43062973022461, "learning_rate": 4.5210172413793104e-06, "loss": 0.4862, "step": 37800 }, { "epoch": 0.37, "grad_norm": 9.466647148132324, "learning_rate": 4.520586206896552e-06, "loss": 0.2127, "step": 37825 }, { "epoch": 0.37, "grad_norm": 14.125153541564941, "learning_rate": 4.520155172413793e-06, "loss": 0.4375, "step": 37850 }, { "epoch": 0.37, "grad_norm": 10.704428672790527, "learning_rate": 4.519724137931035e-06, "loss": 0.2193, "step": 37875 }, { "epoch": 0.37, "grad_norm": 15.229955673217773, "learning_rate": 4.519293103448276e-06, "loss": 0.3881, "step": 37900 }, { "epoch": 0.37, "grad_norm": 10.884405136108398, "learning_rate": 4.518862068965518e-06, "loss": 0.2165, "step": 37925 }, { "epoch": 0.37, "grad_norm": 17.011734008789062, "learning_rate": 4.518431034482759e-06, "loss": 0.4033, "step": 37950 }, { "epoch": 0.37, "grad_norm": 9.552314758300781, "learning_rate": 4.518e-06, "loss": 0.2404, "step": 37975 }, { "epoch": 0.37, "grad_norm": 16.75690460205078, "learning_rate": 4.517568965517242e-06, "loss": 0.4565, "step": 38000 }, { "epoch": 0.37, "grad_norm": 20.041532516479492, "learning_rate": 4.5171379310344835e-06, "loss": 0.2892, "step": 38025 }, { "epoch": 0.37, "grad_norm": 18.55501937866211, "learning_rate": 4.516706896551725e-06, "loss": 0.4387, "step": 38050 }, { "epoch": 0.37, "grad_norm": 13.003132820129395, "learning_rate": 4.516275862068966e-06, "loss": 0.2444, "step": 38075 }, { "epoch": 0.37, "grad_norm": 18.198944091796875, "learning_rate": 4.515844827586207e-06, "loss": 0.3816, "step": 38100 }, { "epoch": 0.37, "grad_norm": 6.279482841491699, "learning_rate": 4.5154137931034485e-06, "loss": 0.2658, "step": 38125 }, { "epoch": 0.38, "grad_norm": 19.802949905395508, "learning_rate": 4.51498275862069e-06, "loss": 0.4235, "step": 38150 }, { "epoch": 0.38, "grad_norm": 10.810375213623047, "learning_rate": 4.5145517241379314e-06, "loss": 0.2261, "step": 38175 }, { "epoch": 0.38, "grad_norm": 18.352266311645508, "learning_rate": 4.514120689655173e-06, "loss": 0.4864, "step": 38200 }, { "epoch": 0.38, "grad_norm": 14.408841133117676, "learning_rate": 4.513689655172414e-06, "loss": 0.2053, "step": 38225 }, { "epoch": 0.38, "grad_norm": 27.976327896118164, "learning_rate": 4.513258620689656e-06, "loss": 0.3698, "step": 38250 }, { "epoch": 0.38, "grad_norm": 11.168535232543945, "learning_rate": 4.512827586206897e-06, "loss": 0.2471, "step": 38275 }, { "epoch": 0.38, "grad_norm": 8.616071701049805, "learning_rate": 4.512396551724138e-06, "loss": 0.3958, "step": 38300 }, { "epoch": 0.38, "grad_norm": 13.299789428710938, "learning_rate": 4.511965517241379e-06, "loss": 0.2551, "step": 38325 }, { "epoch": 0.38, "grad_norm": 22.91212272644043, "learning_rate": 4.511534482758621e-06, "loss": 0.4452, "step": 38350 }, { "epoch": 0.38, "grad_norm": 10.62458324432373, "learning_rate": 4.511103448275862e-06, "loss": 0.2703, "step": 38375 }, { "epoch": 0.38, "grad_norm": 11.962115287780762, "learning_rate": 4.5106896551724145e-06, "loss": 0.4108, "step": 38400 }, { "epoch": 0.38, "grad_norm": 12.380377769470215, "learning_rate": 4.510258620689655e-06, "loss": 0.2688, "step": 38425 }, { "epoch": 0.38, "grad_norm": 18.1773681640625, "learning_rate": 4.509827586206897e-06, "loss": 0.4645, "step": 38450 }, { "epoch": 0.38, "grad_norm": 13.371861457824707, "learning_rate": 4.509396551724138e-06, "loss": 0.2158, "step": 38475 }, { "epoch": 0.38, "grad_norm": 18.578086853027344, "learning_rate": 4.5089655172413795e-06, "loss": 0.4612, "step": 38500 }, { "epoch": 0.38, "grad_norm": 13.557027816772461, "learning_rate": 4.508534482758621e-06, "loss": 0.2739, "step": 38525 }, { "epoch": 0.38, "grad_norm": 16.659467697143555, "learning_rate": 4.5081034482758624e-06, "loss": 0.4574, "step": 38550 }, { "epoch": 0.38, "grad_norm": 12.510357856750488, "learning_rate": 4.507672413793104e-06, "loss": 0.2355, "step": 38575 }, { "epoch": 0.38, "grad_norm": 10.673562049865723, "learning_rate": 4.507241379310345e-06, "loss": 0.3789, "step": 38600 }, { "epoch": 0.38, "grad_norm": 13.67275619506836, "learning_rate": 4.506810344827587e-06, "loss": 0.2411, "step": 38625 }, { "epoch": 0.38, "grad_norm": 17.22715187072754, "learning_rate": 4.506379310344827e-06, "loss": 0.3869, "step": 38650 }, { "epoch": 0.38, "grad_norm": 8.876727104187012, "learning_rate": 4.50594827586207e-06, "loss": 0.2621, "step": 38675 }, { "epoch": 0.38, "grad_norm": 20.240190505981445, "learning_rate": 4.505517241379311e-06, "loss": 0.4853, "step": 38700 }, { "epoch": 0.38, "grad_norm": 10.267875671386719, "learning_rate": 4.505086206896553e-06, "loss": 0.2814, "step": 38725 }, { "epoch": 0.38, "grad_norm": 27.76569366455078, "learning_rate": 4.504655172413793e-06, "loss": 0.4253, "step": 38750 }, { "epoch": 0.38, "grad_norm": 13.806145668029785, "learning_rate": 4.504224137931035e-06, "loss": 0.2276, "step": 38775 }, { "epoch": 0.38, "grad_norm": 19.91973304748535, "learning_rate": 4.503793103448276e-06, "loss": 0.4526, "step": 38800 }, { "epoch": 0.38, "grad_norm": 10.671858787536621, "learning_rate": 4.503362068965518e-06, "loss": 0.2274, "step": 38825 }, { "epoch": 0.38, "grad_norm": 24.047483444213867, "learning_rate": 4.502931034482759e-06, "loss": 0.3998, "step": 38850 }, { "epoch": 0.38, "grad_norm": 8.392446517944336, "learning_rate": 4.5025000000000005e-06, "loss": 0.2467, "step": 38875 }, { "epoch": 0.38, "grad_norm": 8.914700508117676, "learning_rate": 4.502068965517242e-06, "loss": 0.397, "step": 38900 }, { "epoch": 0.38, "grad_norm": 10.2765531539917, "learning_rate": 4.5016379310344834e-06, "loss": 0.2285, "step": 38925 }, { "epoch": 0.38, "grad_norm": 15.841513633728027, "learning_rate": 4.501206896551725e-06, "loss": 0.395, "step": 38950 }, { "epoch": 0.38, "grad_norm": 10.308747291564941, "learning_rate": 4.5007758620689655e-06, "loss": 0.2118, "step": 38975 }, { "epoch": 0.38, "grad_norm": 23.921146392822266, "learning_rate": 4.500344827586207e-06, "loss": 0.4126, "step": 39000 }, { "epoch": 0.38, "grad_norm": 10.221600532531738, "learning_rate": 4.499913793103448e-06, "loss": 0.2377, "step": 39025 }, { "epoch": 0.38, "grad_norm": 22.330158233642578, "learning_rate": 4.49948275862069e-06, "loss": 0.395, "step": 39050 }, { "epoch": 0.38, "grad_norm": 6.340840816497803, "learning_rate": 4.499051724137931e-06, "loss": 0.2144, "step": 39075 }, { "epoch": 0.38, "grad_norm": 13.1018648147583, "learning_rate": 4.498620689655173e-06, "loss": 0.4155, "step": 39100 }, { "epoch": 0.38, "grad_norm": 10.044419288635254, "learning_rate": 4.498189655172414e-06, "loss": 0.2056, "step": 39125 }, { "epoch": 0.39, "grad_norm": 21.04882049560547, "learning_rate": 4.497758620689656e-06, "loss": 0.4575, "step": 39150 }, { "epoch": 0.39, "grad_norm": 6.194339752197266, "learning_rate": 4.497327586206897e-06, "loss": 0.2067, "step": 39175 }, { "epoch": 0.39, "grad_norm": 19.47777557373047, "learning_rate": 4.496896551724138e-06, "loss": 0.4724, "step": 39200 }, { "epoch": 0.39, "grad_norm": 10.807174682617188, "learning_rate": 4.496465517241379e-06, "loss": 0.2453, "step": 39225 }, { "epoch": 0.39, "grad_norm": 22.96137809753418, "learning_rate": 4.496034482758621e-06, "loss": 0.4987, "step": 39250 }, { "epoch": 0.39, "grad_norm": 15.68189811706543, "learning_rate": 4.495603448275863e-06, "loss": 0.2031, "step": 39275 }, { "epoch": 0.39, "grad_norm": 17.218523025512695, "learning_rate": 4.495172413793104e-06, "loss": 0.3893, "step": 39300 }, { "epoch": 0.39, "grad_norm": 9.530386924743652, "learning_rate": 4.494741379310345e-06, "loss": 0.2509, "step": 39325 }, { "epoch": 0.39, "grad_norm": 21.082138061523438, "learning_rate": 4.4943103448275865e-06, "loss": 0.4244, "step": 39350 }, { "epoch": 0.39, "grad_norm": 9.05782699584961, "learning_rate": 4.493879310344828e-06, "loss": 0.2317, "step": 39375 }, { "epoch": 0.39, "grad_norm": 20.88053321838379, "learning_rate": 4.493448275862069e-06, "loss": 0.3996, "step": 39400 }, { "epoch": 0.39, "grad_norm": 11.323485374450684, "learning_rate": 4.493017241379311e-06, "loss": 0.198, "step": 39425 }, { "epoch": 0.39, "grad_norm": 23.550010681152344, "learning_rate": 4.492586206896552e-06, "loss": 0.4075, "step": 39450 }, { "epoch": 0.39, "grad_norm": 11.702893257141113, "learning_rate": 4.492155172413794e-06, "loss": 0.238, "step": 39475 }, { "epoch": 0.39, "grad_norm": 15.820634841918945, "learning_rate": 4.491724137931035e-06, "loss": 0.4671, "step": 39500 }, { "epoch": 0.39, "grad_norm": 9.644647598266602, "learning_rate": 4.491293103448276e-06, "loss": 0.1751, "step": 39525 }, { "epoch": 0.39, "grad_norm": 10.163629531860352, "learning_rate": 4.490862068965517e-06, "loss": 0.472, "step": 39550 }, { "epoch": 0.39, "grad_norm": 6.927372932434082, "learning_rate": 4.490431034482759e-06, "loss": 0.1942, "step": 39575 }, { "epoch": 0.39, "grad_norm": 21.362579345703125, "learning_rate": 4.49e-06, "loss": 0.4531, "step": 39600 }, { "epoch": 0.39, "grad_norm": 9.926820755004883, "learning_rate": 4.489568965517242e-06, "loss": 0.2322, "step": 39625 }, { "epoch": 0.39, "grad_norm": 20.33568572998047, "learning_rate": 4.489137931034483e-06, "loss": 0.382, "step": 39650 }, { "epoch": 0.39, "grad_norm": 13.272313117980957, "learning_rate": 4.488706896551725e-06, "loss": 0.262, "step": 39675 }, { "epoch": 0.39, "grad_norm": 18.956405639648438, "learning_rate": 4.488275862068966e-06, "loss": 0.4361, "step": 39700 }, { "epoch": 0.39, "grad_norm": 9.887969970703125, "learning_rate": 4.4878448275862075e-06, "loss": 0.2117, "step": 39725 }, { "epoch": 0.39, "grad_norm": 17.843029022216797, "learning_rate": 4.487413793103448e-06, "loss": 0.4638, "step": 39750 }, { "epoch": 0.39, "grad_norm": 7.1946306228637695, "learning_rate": 4.4869827586206896e-06, "loss": 0.2274, "step": 39775 }, { "epoch": 0.39, "grad_norm": 20.08864974975586, "learning_rate": 4.486551724137931e-06, "loss": 0.3846, "step": 39800 }, { "epoch": 0.39, "grad_norm": 7.96584415435791, "learning_rate": 4.4861206896551725e-06, "loss": 0.2601, "step": 39825 }, { "epoch": 0.39, "grad_norm": 18.589576721191406, "learning_rate": 4.485689655172415e-06, "loss": 0.4547, "step": 39850 }, { "epoch": 0.39, "grad_norm": 6.656627655029297, "learning_rate": 4.485258620689655e-06, "loss": 0.2328, "step": 39875 }, { "epoch": 0.39, "grad_norm": 14.560169219970703, "learning_rate": 4.484827586206897e-06, "loss": 0.4132, "step": 39900 }, { "epoch": 0.39, "grad_norm": 8.276678085327148, "learning_rate": 4.484396551724138e-06, "loss": 0.2351, "step": 39925 }, { "epoch": 0.39, "grad_norm": 22.150606155395508, "learning_rate": 4.48396551724138e-06, "loss": 0.4352, "step": 39950 }, { "epoch": 0.39, "grad_norm": 2.228229522705078, "learning_rate": 4.483534482758621e-06, "loss": 0.2224, "step": 39975 }, { "epoch": 0.39, "grad_norm": 15.6008882522583, "learning_rate": 4.483103448275863e-06, "loss": 0.4124, "step": 40000 }, { "epoch": 0.39, "eval_loss": 0.4512104392051697, "eval_runtime": 5859.6244, "eval_samples_per_second": 1.616, "eval_steps_per_second": 0.202, "eval_wer": 0.1516931693813172, "step": 40000 }, { "epoch": 0.39, "grad_norm": 10.454863548278809, "learning_rate": 4.482672413793104e-06, "loss": 0.2455, "step": 40025 }, { "epoch": 0.39, "grad_norm": 17.782644271850586, "learning_rate": 4.482241379310346e-06, "loss": 0.4359, "step": 40050 }, { "epoch": 0.39, "grad_norm": 10.770785331726074, "learning_rate": 4.481810344827587e-06, "loss": 0.2256, "step": 40075 }, { "epoch": 0.39, "grad_norm": 17.7884578704834, "learning_rate": 4.481379310344828e-06, "loss": 0.4026, "step": 40100 }, { "epoch": 0.39, "grad_norm": 9.677326202392578, "learning_rate": 4.480948275862069e-06, "loss": 0.2291, "step": 40125 }, { "epoch": 0.39, "grad_norm": 18.52685546875, "learning_rate": 4.4805172413793106e-06, "loss": 0.3826, "step": 40150 }, { "epoch": 0.4, "grad_norm": 7.997037410736084, "learning_rate": 4.480086206896552e-06, "loss": 0.2636, "step": 40175 }, { "epoch": 0.4, "grad_norm": 13.289563179016113, "learning_rate": 4.4796551724137935e-06, "loss": 0.4545, "step": 40200 }, { "epoch": 0.4, "grad_norm": 11.063397407531738, "learning_rate": 4.479224137931035e-06, "loss": 0.2515, "step": 40225 }, { "epoch": 0.4, "grad_norm": 15.822882652282715, "learning_rate": 4.478793103448276e-06, "loss": 0.4507, "step": 40250 }, { "epoch": 0.4, "grad_norm": 7.623760223388672, "learning_rate": 4.478362068965518e-06, "loss": 0.2152, "step": 40275 }, { "epoch": 0.4, "grad_norm": 17.56698989868164, "learning_rate": 4.477931034482759e-06, "loss": 0.4649, "step": 40300 }, { "epoch": 0.4, "grad_norm": 7.548598289489746, "learning_rate": 4.4775e-06, "loss": 0.2062, "step": 40325 }, { "epoch": 0.4, "grad_norm": 20.73970603942871, "learning_rate": 4.477068965517241e-06, "loss": 0.4388, "step": 40350 }, { "epoch": 0.4, "grad_norm": 9.44389533996582, "learning_rate": 4.476637931034483e-06, "loss": 0.2621, "step": 40375 }, { "epoch": 0.4, "grad_norm": 17.0009765625, "learning_rate": 4.476206896551724e-06, "loss": 0.4068, "step": 40400 }, { "epoch": 0.4, "grad_norm": 9.482954978942871, "learning_rate": 4.475775862068966e-06, "loss": 0.2203, "step": 40425 }, { "epoch": 0.4, "grad_norm": 18.89891242980957, "learning_rate": 4.475362068965517e-06, "loss": 0.4551, "step": 40450 }, { "epoch": 0.4, "grad_norm": 14.447978973388672, "learning_rate": 4.474931034482759e-06, "loss": 0.263, "step": 40475 }, { "epoch": 0.4, "grad_norm": 20.56426429748535, "learning_rate": 4.4745e-06, "loss": 0.3689, "step": 40500 }, { "epoch": 0.4, "grad_norm": 15.64648151397705, "learning_rate": 4.474068965517242e-06, "loss": 0.2402, "step": 40525 }, { "epoch": 0.4, "grad_norm": 24.635164260864258, "learning_rate": 4.473637931034483e-06, "loss": 0.4795, "step": 40550 }, { "epoch": 0.4, "grad_norm": 10.395281791687012, "learning_rate": 4.4732068965517245e-06, "loss": 0.233, "step": 40575 }, { "epoch": 0.4, "grad_norm": 18.46332359313965, "learning_rate": 4.472775862068966e-06, "loss": 0.4326, "step": 40600 }, { "epoch": 0.4, "grad_norm": 12.077484130859375, "learning_rate": 4.472344827586207e-06, "loss": 0.2051, "step": 40625 }, { "epoch": 0.4, "grad_norm": 14.77414608001709, "learning_rate": 4.471913793103449e-06, "loss": 0.4605, "step": 40650 }, { "epoch": 0.4, "grad_norm": 6.502984046936035, "learning_rate": 4.47148275862069e-06, "loss": 0.2002, "step": 40675 }, { "epoch": 0.4, "grad_norm": 18.04239273071289, "learning_rate": 4.471051724137932e-06, "loss": 0.4068, "step": 40700 }, { "epoch": 0.4, "grad_norm": 9.772119522094727, "learning_rate": 4.470620689655173e-06, "loss": 0.2321, "step": 40725 }, { "epoch": 0.4, "grad_norm": 14.973760604858398, "learning_rate": 4.470189655172415e-06, "loss": 0.4167, "step": 40750 }, { "epoch": 0.4, "grad_norm": 18.479948043823242, "learning_rate": 4.469758620689655e-06, "loss": 0.2367, "step": 40775 }, { "epoch": 0.4, "grad_norm": 20.795642852783203, "learning_rate": 4.469327586206897e-06, "loss": 0.4629, "step": 40800 }, { "epoch": 0.4, "grad_norm": 10.979304313659668, "learning_rate": 4.468896551724138e-06, "loss": 0.2138, "step": 40825 }, { "epoch": 0.4, "grad_norm": 19.909400939941406, "learning_rate": 4.46846551724138e-06, "loss": 0.4871, "step": 40850 }, { "epoch": 0.4, "grad_norm": 9.482733726501465, "learning_rate": 4.468034482758621e-06, "loss": 0.2199, "step": 40875 }, { "epoch": 0.4, "grad_norm": 14.725374221801758, "learning_rate": 4.4676034482758626e-06, "loss": 0.4992, "step": 40900 }, { "epoch": 0.4, "grad_norm": 9.718913078308105, "learning_rate": 4.467172413793104e-06, "loss": 0.2588, "step": 40925 }, { "epoch": 0.4, "grad_norm": 22.315507888793945, "learning_rate": 4.4667413793103455e-06, "loss": 0.4863, "step": 40950 }, { "epoch": 0.4, "grad_norm": 10.784211158752441, "learning_rate": 4.466310344827586e-06, "loss": 0.2513, "step": 40975 }, { "epoch": 0.4, "grad_norm": 25.30809783935547, "learning_rate": 4.4658793103448275e-06, "loss": 0.5197, "step": 41000 }, { "epoch": 0.4, "grad_norm": 10.624234199523926, "learning_rate": 4.465448275862069e-06, "loss": 0.2367, "step": 41025 }, { "epoch": 0.4, "grad_norm": 17.696748733520508, "learning_rate": 4.4650172413793105e-06, "loss": 0.3896, "step": 41050 }, { "epoch": 0.4, "grad_norm": 12.644737243652344, "learning_rate": 4.464586206896552e-06, "loss": 0.2265, "step": 41075 }, { "epoch": 0.4, "grad_norm": 18.408470153808594, "learning_rate": 4.464155172413793e-06, "loss": 0.4282, "step": 41100 }, { "epoch": 0.4, "grad_norm": 15.279019355773926, "learning_rate": 4.463724137931035e-06, "loss": 0.2529, "step": 41125 }, { "epoch": 0.4, "grad_norm": 25.193531036376953, "learning_rate": 4.463293103448276e-06, "loss": 0.4959, "step": 41150 }, { "epoch": 0.4, "grad_norm": 12.686478614807129, "learning_rate": 4.462862068965518e-06, "loss": 0.2602, "step": 41175 }, { "epoch": 0.41, "grad_norm": 17.390636444091797, "learning_rate": 4.462431034482758e-06, "loss": 0.4937, "step": 41200 }, { "epoch": 0.41, "grad_norm": 13.429943084716797, "learning_rate": 4.462e-06, "loss": 0.227, "step": 41225 }, { "epoch": 0.41, "grad_norm": 18.474672317504883, "learning_rate": 4.461568965517242e-06, "loss": 0.4033, "step": 41250 }, { "epoch": 0.41, "grad_norm": 9.506254196166992, "learning_rate": 4.4611379310344836e-06, "loss": 0.251, "step": 41275 }, { "epoch": 0.41, "grad_norm": 20.7669620513916, "learning_rate": 4.460706896551725e-06, "loss": 0.4305, "step": 41300 }, { "epoch": 0.41, "grad_norm": 7.068553924560547, "learning_rate": 4.460275862068966e-06, "loss": 0.2241, "step": 41325 }, { "epoch": 0.41, "grad_norm": 19.646562576293945, "learning_rate": 4.459844827586207e-06, "loss": 0.4235, "step": 41350 }, { "epoch": 0.41, "grad_norm": 7.921345233917236, "learning_rate": 4.4594137931034485e-06, "loss": 0.2659, "step": 41375 }, { "epoch": 0.41, "grad_norm": 17.85953712463379, "learning_rate": 4.45898275862069e-06, "loss": 0.4545, "step": 41400 }, { "epoch": 0.41, "grad_norm": 12.014487266540527, "learning_rate": 4.4585517241379315e-06, "loss": 0.2485, "step": 41425 }, { "epoch": 0.41, "grad_norm": 15.363654136657715, "learning_rate": 4.458120689655173e-06, "loss": 0.4086, "step": 41450 }, { "epoch": 0.41, "grad_norm": 14.067072868347168, "learning_rate": 4.457689655172414e-06, "loss": 0.2573, "step": 41475 }, { "epoch": 0.41, "grad_norm": 17.460134506225586, "learning_rate": 4.457258620689656e-06, "loss": 0.3912, "step": 41500 }, { "epoch": 0.41, "grad_norm": 6.4836745262146, "learning_rate": 4.456827586206897e-06, "loss": 0.244, "step": 41525 }, { "epoch": 0.41, "grad_norm": 17.270065307617188, "learning_rate": 4.456396551724138e-06, "loss": 0.3862, "step": 41550 }, { "epoch": 0.41, "grad_norm": 11.792112350463867, "learning_rate": 4.455965517241379e-06, "loss": 0.2522, "step": 41575 }, { "epoch": 0.41, "grad_norm": 30.499788284301758, "learning_rate": 4.455534482758621e-06, "loss": 0.3889, "step": 41600 }, { "epoch": 0.41, "grad_norm": 7.305752277374268, "learning_rate": 4.455103448275862e-06, "loss": 0.2513, "step": 41625 }, { "epoch": 0.41, "grad_norm": 17.95638656616211, "learning_rate": 4.454672413793104e-06, "loss": 0.4975, "step": 41650 }, { "epoch": 0.41, "grad_norm": 8.893549919128418, "learning_rate": 4.454241379310345e-06, "loss": 0.2387, "step": 41675 }, { "epoch": 0.41, "grad_norm": 20.9178466796875, "learning_rate": 4.453810344827587e-06, "loss": 0.3697, "step": 41700 }, { "epoch": 0.41, "grad_norm": 7.932641506195068, "learning_rate": 4.453379310344828e-06, "loss": 0.2311, "step": 41725 }, { "epoch": 0.41, "grad_norm": 22.40071678161621, "learning_rate": 4.4529482758620695e-06, "loss": 0.3489, "step": 41750 }, { "epoch": 0.41, "grad_norm": 14.794843673706055, "learning_rate": 4.45251724137931e-06, "loss": 0.2202, "step": 41775 }, { "epoch": 0.41, "grad_norm": 10.32616901397705, "learning_rate": 4.452086206896552e-06, "loss": 0.3766, "step": 41800 }, { "epoch": 0.41, "grad_norm": 12.273259162902832, "learning_rate": 4.451655172413794e-06, "loss": 0.234, "step": 41825 }, { "epoch": 0.41, "grad_norm": 18.571176528930664, "learning_rate": 4.451224137931035e-06, "loss": 0.451, "step": 41850 }, { "epoch": 0.41, "grad_norm": 14.143345832824707, "learning_rate": 4.450793103448276e-06, "loss": 0.2128, "step": 41875 }, { "epoch": 0.41, "grad_norm": 13.71805477142334, "learning_rate": 4.4503620689655174e-06, "loss": 0.3955, "step": 41900 }, { "epoch": 0.41, "grad_norm": 13.646988868713379, "learning_rate": 4.449931034482759e-06, "loss": 0.2608, "step": 41925 }, { "epoch": 0.41, "grad_norm": 28.013582229614258, "learning_rate": 4.4495e-06, "loss": 0.4327, "step": 41950 }, { "epoch": 0.41, "grad_norm": 5.930446624755859, "learning_rate": 4.449068965517242e-06, "loss": 0.2009, "step": 41975 }, { "epoch": 0.41, "grad_norm": 23.710172653198242, "learning_rate": 4.448637931034483e-06, "loss": 0.441, "step": 42000 }, { "epoch": 0.41, "grad_norm": 13.632411003112793, "learning_rate": 4.448206896551725e-06, "loss": 0.2449, "step": 42025 }, { "epoch": 0.41, "grad_norm": 14.247718811035156, "learning_rate": 4.447775862068966e-06, "loss": 0.4267, "step": 42050 }, { "epoch": 0.41, "grad_norm": 15.57923698425293, "learning_rate": 4.447344827586208e-06, "loss": 0.2599, "step": 42075 }, { "epoch": 0.41, "grad_norm": 19.192323684692383, "learning_rate": 4.446913793103448e-06, "loss": 0.4412, "step": 42100 }, { "epoch": 0.41, "grad_norm": 14.393930435180664, "learning_rate": 4.44648275862069e-06, "loss": 0.223, "step": 42125 }, { "epoch": 0.41, "grad_norm": 15.00341796875, "learning_rate": 4.446051724137931e-06, "loss": 0.4325, "step": 42150 }, { "epoch": 0.41, "grad_norm": 9.92784595489502, "learning_rate": 4.445620689655173e-06, "loss": 0.2833, "step": 42175 }, { "epoch": 0.42, "grad_norm": 17.549972534179688, "learning_rate": 4.445189655172414e-06, "loss": 0.3991, "step": 42200 }, { "epoch": 0.42, "grad_norm": 7.492990970611572, "learning_rate": 4.4447586206896555e-06, "loss": 0.1849, "step": 42225 }, { "epoch": 0.42, "grad_norm": 18.558006286621094, "learning_rate": 4.444327586206897e-06, "loss": 0.4628, "step": 42250 }, { "epoch": 0.42, "grad_norm": 10.731229782104492, "learning_rate": 4.4438965517241384e-06, "loss": 0.2321, "step": 42275 }, { "epoch": 0.42, "grad_norm": 26.606395721435547, "learning_rate": 4.44346551724138e-06, "loss": 0.5023, "step": 42300 }, { "epoch": 0.42, "grad_norm": 15.915567398071289, "learning_rate": 4.4430344827586205e-06, "loss": 0.2305, "step": 42325 }, { "epoch": 0.42, "grad_norm": 13.472663879394531, "learning_rate": 4.442603448275862e-06, "loss": 0.3792, "step": 42350 }, { "epoch": 0.42, "grad_norm": 15.82785701751709, "learning_rate": 4.442172413793103e-06, "loss": 0.2402, "step": 42375 }, { "epoch": 0.42, "grad_norm": 18.823936462402344, "learning_rate": 4.441741379310346e-06, "loss": 0.3812, "step": 42400 }, { "epoch": 0.42, "grad_norm": 10.024895668029785, "learning_rate": 4.441310344827587e-06, "loss": 0.2709, "step": 42425 }, { "epoch": 0.42, "grad_norm": Infinity, "learning_rate": 4.440896551724138e-06, "loss": 0.4828, "step": 42450 }, { "epoch": 0.42, "grad_norm": 9.784287452697754, "learning_rate": 4.440465517241379e-06, "loss": 0.2121, "step": 42475 }, { "epoch": 0.42, "grad_norm": 20.42222023010254, "learning_rate": 4.4400344827586215e-06, "loss": 0.4078, "step": 42500 }, { "epoch": 0.42, "grad_norm": 8.412294387817383, "learning_rate": 4.439603448275863e-06, "loss": 0.2364, "step": 42525 }, { "epoch": 0.42, "grad_norm": 18.875242233276367, "learning_rate": 4.439172413793104e-06, "loss": 0.3824, "step": 42550 }, { "epoch": 0.42, "grad_norm": 5.844049453735352, "learning_rate": 4.438741379310345e-06, "loss": 0.2178, "step": 42575 }, { "epoch": 0.42, "grad_norm": 20.592960357666016, "learning_rate": 4.4383103448275865e-06, "loss": 0.4451, "step": 42600 }, { "epoch": 0.42, "grad_norm": 11.507908821105957, "learning_rate": 4.437879310344828e-06, "loss": 0.2389, "step": 42625 }, { "epoch": 0.42, "grad_norm": 18.6999454498291, "learning_rate": 4.4374482758620694e-06, "loss": 0.4102, "step": 42650 }, { "epoch": 0.42, "grad_norm": 11.97490119934082, "learning_rate": 4.437017241379311e-06, "loss": 0.3029, "step": 42675 }, { "epoch": 0.42, "grad_norm": 20.012828826904297, "learning_rate": 4.436586206896552e-06, "loss": 0.4549, "step": 42700 }, { "epoch": 0.42, "grad_norm": 10.251546859741211, "learning_rate": 4.436155172413794e-06, "loss": 0.239, "step": 42725 }, { "epoch": 0.42, "grad_norm": 23.401283264160156, "learning_rate": 4.435724137931035e-06, "loss": 0.3708, "step": 42750 }, { "epoch": 0.42, "grad_norm": 11.288385391235352, "learning_rate": 4.435293103448276e-06, "loss": 0.28, "step": 42775 }, { "epoch": 0.42, "grad_norm": 20.328630447387695, "learning_rate": 4.434862068965517e-06, "loss": 0.3862, "step": 42800 }, { "epoch": 0.42, "grad_norm": 7.711789608001709, "learning_rate": 4.434431034482759e-06, "loss": 0.2176, "step": 42825 }, { "epoch": 0.42, "grad_norm": 21.829233169555664, "learning_rate": 4.434e-06, "loss": 0.4318, "step": 42850 }, { "epoch": 0.42, "grad_norm": 12.761366844177246, "learning_rate": 4.433568965517242e-06, "loss": 0.1954, "step": 42875 }, { "epoch": 0.42, "grad_norm": 25.325429916381836, "learning_rate": 4.433137931034483e-06, "loss": 0.4066, "step": 42900 }, { "epoch": 0.42, "grad_norm": 11.204826354980469, "learning_rate": 4.432706896551725e-06, "loss": 0.2679, "step": 42925 }, { "epoch": 0.42, "grad_norm": 9.613166809082031, "learning_rate": 4.432275862068966e-06, "loss": 0.3762, "step": 42950 }, { "epoch": 0.42, "grad_norm": 13.409634590148926, "learning_rate": 4.4318448275862075e-06, "loss": 0.2537, "step": 42975 }, { "epoch": 0.42, "grad_norm": 21.013551712036133, "learning_rate": 4.431413793103448e-06, "loss": 0.4251, "step": 43000 }, { "epoch": 0.42, "grad_norm": 13.327977180480957, "learning_rate": 4.43098275862069e-06, "loss": 0.2995, "step": 43025 }, { "epoch": 0.42, "grad_norm": 19.268320083618164, "learning_rate": 4.430551724137931e-06, "loss": 0.4174, "step": 43050 }, { "epoch": 0.42, "grad_norm": 13.107914924621582, "learning_rate": 4.430120689655173e-06, "loss": 0.2248, "step": 43075 }, { "epoch": 0.42, "grad_norm": 12.042820930480957, "learning_rate": 4.429689655172414e-06, "loss": 0.4307, "step": 43100 }, { "epoch": 0.42, "grad_norm": 13.20609188079834, "learning_rate": 4.429258620689655e-06, "loss": 0.2339, "step": 43125 }, { "epoch": 0.42, "grad_norm": 23.074966430664062, "learning_rate": 4.428827586206897e-06, "loss": 0.4903, "step": 43150 }, { "epoch": 0.42, "grad_norm": 15.46044921875, "learning_rate": 4.428396551724138e-06, "loss": 0.244, "step": 43175 }, { "epoch": 0.42, "grad_norm": 17.643781661987305, "learning_rate": 4.42796551724138e-06, "loss": 0.4187, "step": 43200 }, { "epoch": 0.43, "grad_norm": 7.914738655090332, "learning_rate": 4.427534482758621e-06, "loss": 0.195, "step": 43225 }, { "epoch": 0.43, "grad_norm": 18.79716682434082, "learning_rate": 4.427103448275863e-06, "loss": 0.5037, "step": 43250 }, { "epoch": 0.43, "grad_norm": 13.077864646911621, "learning_rate": 4.426672413793104e-06, "loss": 0.2264, "step": 43275 }, { "epoch": 0.43, "grad_norm": 18.008544921875, "learning_rate": 4.426241379310346e-06, "loss": 0.3963, "step": 43300 }, { "epoch": 0.43, "grad_norm": 7.898217678070068, "learning_rate": 4.425810344827586e-06, "loss": 0.2593, "step": 43325 }, { "epoch": 0.43, "grad_norm": 24.652259826660156, "learning_rate": 4.425379310344828e-06, "loss": 0.3655, "step": 43350 }, { "epoch": 0.43, "grad_norm": 12.413901329040527, "learning_rate": 4.424948275862069e-06, "loss": 0.2436, "step": 43375 }, { "epoch": 0.43, "grad_norm": 18.63458824157715, "learning_rate": 4.424517241379311e-06, "loss": 0.4076, "step": 43400 }, { "epoch": 0.43, "grad_norm": 8.724047660827637, "learning_rate": 4.424086206896552e-06, "loss": 0.2375, "step": 43425 }, { "epoch": 0.43, "grad_norm": 19.71544075012207, "learning_rate": 4.4236551724137935e-06, "loss": 0.4233, "step": 43450 }, { "epoch": 0.43, "grad_norm": 9.120856285095215, "learning_rate": 4.423224137931035e-06, "loss": 0.2393, "step": 43475 }, { "epoch": 0.43, "grad_norm": 21.241676330566406, "learning_rate": 4.422793103448276e-06, "loss": 0.4006, "step": 43500 }, { "epoch": 0.43, "grad_norm": 8.276673316955566, "learning_rate": 4.422362068965518e-06, "loss": 0.2247, "step": 43525 }, { "epoch": 0.43, "grad_norm": 19.462173461914062, "learning_rate": 4.4219310344827585e-06, "loss": 0.413, "step": 43550 }, { "epoch": 0.43, "grad_norm": 12.388134956359863, "learning_rate": 4.4215e-06, "loss": 0.2621, "step": 43575 }, { "epoch": 0.43, "grad_norm": 11.436580657958984, "learning_rate": 4.421068965517241e-06, "loss": 0.4089, "step": 43600 }, { "epoch": 0.43, "grad_norm": 12.826902389526367, "learning_rate": 4.420637931034483e-06, "loss": 0.2394, "step": 43625 }, { "epoch": 0.43, "grad_norm": 21.054967880249023, "learning_rate": 4.420206896551725e-06, "loss": 0.484, "step": 43650 }, { "epoch": 0.43, "grad_norm": 6.5651702880859375, "learning_rate": 4.419775862068966e-06, "loss": 0.2307, "step": 43675 }, { "epoch": 0.43, "grad_norm": 22.525203704833984, "learning_rate": 4.419344827586207e-06, "loss": 0.4233, "step": 43700 }, { "epoch": 0.43, "grad_norm": 8.011157989501953, "learning_rate": 4.418913793103449e-06, "loss": 0.2332, "step": 43725 }, { "epoch": 0.43, "grad_norm": 14.606954574584961, "learning_rate": 4.41848275862069e-06, "loss": 0.448, "step": 43750 }, { "epoch": 0.43, "grad_norm": 15.796810150146484, "learning_rate": 4.418051724137931e-06, "loss": 0.2326, "step": 43775 }, { "epoch": 0.43, "grad_norm": 21.370969772338867, "learning_rate": 4.417620689655173e-06, "loss": 0.5072, "step": 43800 }, { "epoch": 0.43, "grad_norm": 8.778035163879395, "learning_rate": 4.4171896551724145e-06, "loss": 0.2302, "step": 43825 }, { "epoch": 0.43, "grad_norm": 16.322721481323242, "learning_rate": 4.416758620689656e-06, "loss": 0.4621, "step": 43850 }, { "epoch": 0.43, "grad_norm": 7.388380527496338, "learning_rate": 4.416327586206897e-06, "loss": 0.263, "step": 43875 }, { "epoch": 0.43, "grad_norm": 17.44451141357422, "learning_rate": 4.415896551724138e-06, "loss": 0.5145, "step": 43900 }, { "epoch": 0.43, "grad_norm": 5.993071556091309, "learning_rate": 4.4154655172413795e-06, "loss": 0.2441, "step": 43925 }, { "epoch": 0.43, "grad_norm": 19.508970260620117, "learning_rate": 4.415034482758621e-06, "loss": 0.4108, "step": 43950 }, { "epoch": 0.43, "grad_norm": 10.044815063476562, "learning_rate": 4.414603448275862e-06, "loss": 0.2543, "step": 43975 }, { "epoch": 0.43, "grad_norm": 15.14306926727295, "learning_rate": 4.414172413793104e-06, "loss": 0.3634, "step": 44000 }, { "epoch": 0.43, "grad_norm": 10.067526817321777, "learning_rate": 4.413741379310345e-06, "loss": 0.2558, "step": 44025 }, { "epoch": 0.43, "grad_norm": 21.928773880004883, "learning_rate": 4.413310344827587e-06, "loss": 0.4097, "step": 44050 }, { "epoch": 0.43, "grad_norm": 6.463046550750732, "learning_rate": 4.412879310344828e-06, "loss": 0.2428, "step": 44075 }, { "epoch": 0.43, "grad_norm": 18.2835693359375, "learning_rate": 4.41244827586207e-06, "loss": 0.431, "step": 44100 }, { "epoch": 0.43, "grad_norm": 10.172887802124023, "learning_rate": 4.41201724137931e-06, "loss": 0.2408, "step": 44125 }, { "epoch": 0.43, "grad_norm": 10.657248497009277, "learning_rate": 4.411586206896552e-06, "loss": 0.4243, "step": 44150 }, { "epoch": 0.43, "grad_norm": 8.455682754516602, "learning_rate": 4.411155172413793e-06, "loss": 0.3079, "step": 44175 }, { "epoch": 0.43, "grad_norm": 16.029508590698242, "learning_rate": 4.410724137931035e-06, "loss": 0.3542, "step": 44200 }, { "epoch": 0.43, "grad_norm": 11.67091178894043, "learning_rate": 4.410293103448276e-06, "loss": 0.2263, "step": 44225 }, { "epoch": 0.44, "grad_norm": 19.709022521972656, "learning_rate": 4.4098620689655176e-06, "loss": 0.5279, "step": 44250 }, { "epoch": 0.44, "grad_norm": 11.653274536132812, "learning_rate": 4.409431034482759e-06, "loss": 0.2103, "step": 44275 }, { "epoch": 0.44, "grad_norm": 18.30428695678711, "learning_rate": 4.4090000000000005e-06, "loss": 0.4533, "step": 44300 }, { "epoch": 0.44, "grad_norm": 9.12453556060791, "learning_rate": 4.408568965517242e-06, "loss": 0.2353, "step": 44325 }, { "epoch": 0.44, "grad_norm": 15.967944145202637, "learning_rate": 4.4081379310344825e-06, "loss": 0.422, "step": 44350 }, { "epoch": 0.44, "grad_norm": 19.4412899017334, "learning_rate": 4.407706896551725e-06, "loss": 0.2504, "step": 44375 }, { "epoch": 0.44, "grad_norm": 20.947919845581055, "learning_rate": 4.407275862068966e-06, "loss": 0.4533, "step": 44400 }, { "epoch": 0.44, "grad_norm": 13.410249710083008, "learning_rate": 4.406844827586208e-06, "loss": 0.2186, "step": 44425 }, { "epoch": 0.44, "grad_norm": 22.32141876220703, "learning_rate": 4.406413793103448e-06, "loss": 0.5187, "step": 44450 }, { "epoch": 0.44, "grad_norm": 12.900052070617676, "learning_rate": 4.40598275862069e-06, "loss": 0.19, "step": 44475 }, { "epoch": 0.44, "grad_norm": Infinity, "learning_rate": 4.405568965517242e-06, "loss": 0.415, "step": 44500 }, { "epoch": 0.44, "grad_norm": 17.2496395111084, "learning_rate": 4.405137931034484e-06, "loss": 0.2154, "step": 44525 }, { "epoch": 0.44, "grad_norm": 20.16566276550293, "learning_rate": 4.404706896551724e-06, "loss": 0.4808, "step": 44550 }, { "epoch": 0.44, "grad_norm": 9.88083553314209, "learning_rate": 4.404275862068966e-06, "loss": 0.2308, "step": 44575 }, { "epoch": 0.44, "grad_norm": 20.756975173950195, "learning_rate": 4.403844827586207e-06, "loss": 0.39, "step": 44600 }, { "epoch": 0.44, "grad_norm": 14.235981941223145, "learning_rate": 4.4034137931034486e-06, "loss": 0.2282, "step": 44625 }, { "epoch": 0.44, "grad_norm": 14.76537036895752, "learning_rate": 4.40298275862069e-06, "loss": 0.4329, "step": 44650 }, { "epoch": 0.44, "grad_norm": 12.579903602600098, "learning_rate": 4.4025517241379315e-06, "loss": 0.3055, "step": 44675 }, { "epoch": 0.44, "grad_norm": 22.297191619873047, "learning_rate": 4.402120689655173e-06, "loss": 0.5255, "step": 44700 }, { "epoch": 0.44, "grad_norm": 11.342850685119629, "learning_rate": 4.401689655172414e-06, "loss": 0.1932, "step": 44725 }, { "epoch": 0.44, "grad_norm": 15.0001802444458, "learning_rate": 4.401258620689656e-06, "loss": 0.3928, "step": 44750 }, { "epoch": 0.44, "grad_norm": 7.755105018615723, "learning_rate": 4.4008275862068965e-06, "loss": 0.2749, "step": 44775 }, { "epoch": 0.44, "grad_norm": 14.176986694335938, "learning_rate": 4.400396551724138e-06, "loss": 0.352, "step": 44800 }, { "epoch": 0.44, "grad_norm": 5.319585800170898, "learning_rate": 4.399965517241379e-06, "loss": 0.2039, "step": 44825 }, { "epoch": 0.44, "grad_norm": 22.013813018798828, "learning_rate": 4.399534482758621e-06, "loss": 0.4971, "step": 44850 }, { "epoch": 0.44, "grad_norm": 10.038384437561035, "learning_rate": 4.399103448275862e-06, "loss": 0.296, "step": 44875 }, { "epoch": 0.44, "grad_norm": 16.10417366027832, "learning_rate": 4.398672413793104e-06, "loss": 0.4231, "step": 44900 }, { "epoch": 0.44, "grad_norm": 7.553318023681641, "learning_rate": 4.398241379310345e-06, "loss": 0.2323, "step": 44925 }, { "epoch": 0.44, "grad_norm": 14.132684707641602, "learning_rate": 4.397810344827587e-06, "loss": 0.4032, "step": 44950 }, { "epoch": 0.44, "grad_norm": 6.654186725616455, "learning_rate": 4.397379310344828e-06, "loss": 0.2304, "step": 44975 }, { "epoch": 0.44, "grad_norm": 24.96910858154297, "learning_rate": 4.396948275862069e-06, "loss": 0.4606, "step": 45000 }, { "epoch": 0.44, "grad_norm": 8.683185577392578, "learning_rate": 4.39651724137931e-06, "loss": 0.2536, "step": 45025 }, { "epoch": 0.44, "grad_norm": 22.35369110107422, "learning_rate": 4.3960862068965525e-06, "loss": 0.404, "step": 45050 }, { "epoch": 0.44, "grad_norm": 10.054407119750977, "learning_rate": 4.395655172413794e-06, "loss": 0.2394, "step": 45075 }, { "epoch": 0.44, "grad_norm": 13.345102310180664, "learning_rate": 4.395224137931035e-06, "loss": 0.4929, "step": 45100 }, { "epoch": 0.44, "grad_norm": 12.864790916442871, "learning_rate": 4.394793103448276e-06, "loss": 0.2422, "step": 45125 }, { "epoch": 0.44, "grad_norm": 22.02790069580078, "learning_rate": 4.3943620689655175e-06, "loss": 0.4366, "step": 45150 }, { "epoch": 0.44, "grad_norm": 10.110830307006836, "learning_rate": 4.393931034482759e-06, "loss": 0.2434, "step": 45175 }, { "epoch": 0.44, "grad_norm": 9.194856643676758, "learning_rate": 4.3935e-06, "loss": 0.3897, "step": 45200 }, { "epoch": 0.44, "grad_norm": 6.740115642547607, "learning_rate": 4.393068965517242e-06, "loss": 0.2514, "step": 45225 }, { "epoch": 0.45, "grad_norm": 10.754118919372559, "learning_rate": 4.392637931034483e-06, "loss": 0.458, "step": 45250 }, { "epoch": 0.45, "grad_norm": 6.550917148590088, "learning_rate": 4.392206896551725e-06, "loss": 0.2461, "step": 45275 }, { "epoch": 0.45, "grad_norm": 14.059303283691406, "learning_rate": 4.391775862068966e-06, "loss": 0.4908, "step": 45300 }, { "epoch": 0.45, "grad_norm": 15.760354995727539, "learning_rate": 4.391344827586208e-06, "loss": 0.2611, "step": 45325 }, { "epoch": 0.45, "grad_norm": 21.083946228027344, "learning_rate": 4.390913793103448e-06, "loss": 0.3979, "step": 45350 }, { "epoch": 0.45, "grad_norm": 6.672922134399414, "learning_rate": 4.39048275862069e-06, "loss": 0.2586, "step": 45375 }, { "epoch": 0.45, "grad_norm": 11.384966850280762, "learning_rate": 4.390051724137931e-06, "loss": 0.3735, "step": 45400 }, { "epoch": 0.45, "grad_norm": 12.410197257995605, "learning_rate": 4.389620689655173e-06, "loss": 0.212, "step": 45425 }, { "epoch": 0.45, "grad_norm": 20.940446853637695, "learning_rate": 4.389189655172414e-06, "loss": 0.3783, "step": 45450 }, { "epoch": 0.45, "grad_norm": 5.83882474899292, "learning_rate": 4.3887586206896555e-06, "loss": 0.2198, "step": 45475 }, { "epoch": 0.45, "grad_norm": 13.143280029296875, "learning_rate": 4.388327586206897e-06, "loss": 0.4495, "step": 45500 }, { "epoch": 0.45, "grad_norm": 14.63428020477295, "learning_rate": 4.3878965517241385e-06, "loss": 0.224, "step": 45525 }, { "epoch": 0.45, "grad_norm": 21.948558807373047, "learning_rate": 4.38746551724138e-06, "loss": 0.4229, "step": 45550 }, { "epoch": 0.45, "grad_norm": 5.6346869468688965, "learning_rate": 4.3870344827586205e-06, "loss": 0.2361, "step": 45575 }, { "epoch": 0.45, "grad_norm": 14.168978691101074, "learning_rate": 4.386603448275862e-06, "loss": 0.4232, "step": 45600 }, { "epoch": 0.45, "grad_norm": 6.460154056549072, "learning_rate": 4.386172413793104e-06, "loss": 0.2482, "step": 45625 }, { "epoch": 0.45, "grad_norm": 15.491474151611328, "learning_rate": 4.385741379310346e-06, "loss": 0.3803, "step": 45650 }, { "epoch": 0.45, "grad_norm": 13.77922248840332, "learning_rate": 4.385310344827586e-06, "loss": 0.246, "step": 45675 }, { "epoch": 0.45, "grad_norm": 14.143089294433594, "learning_rate": 4.384879310344828e-06, "loss": 0.4559, "step": 45700 }, { "epoch": 0.45, "grad_norm": 9.752158164978027, "learning_rate": 4.384448275862069e-06, "loss": 0.2404, "step": 45725 }, { "epoch": 0.45, "grad_norm": 23.685626983642578, "learning_rate": 4.384017241379311e-06, "loss": 0.3666, "step": 45750 }, { "epoch": 0.45, "grad_norm": 8.23619270324707, "learning_rate": 4.383586206896552e-06, "loss": 0.2189, "step": 45775 }, { "epoch": 0.45, "grad_norm": 24.392553329467773, "learning_rate": 4.383155172413794e-06, "loss": 0.4628, "step": 45800 }, { "epoch": 0.45, "grad_norm": 9.367960929870605, "learning_rate": 4.382724137931035e-06, "loss": 0.2416, "step": 45825 }, { "epoch": 0.45, "grad_norm": 20.00285530090332, "learning_rate": 4.3822931034482765e-06, "loss": 0.4375, "step": 45850 }, { "epoch": 0.45, "grad_norm": 10.980908393859863, "learning_rate": 4.381862068965518e-06, "loss": 0.2594, "step": 45875 }, { "epoch": 0.45, "grad_norm": 11.54518985748291, "learning_rate": 4.381431034482759e-06, "loss": 0.4235, "step": 45900 }, { "epoch": 0.45, "grad_norm": 15.019547462463379, "learning_rate": 4.381e-06, "loss": 0.2527, "step": 45925 }, { "epoch": 0.45, "grad_norm": 9.03507137298584, "learning_rate": 4.3805689655172415e-06, "loss": 0.4213, "step": 45950 }, { "epoch": 0.45, "grad_norm": 10.558300971984863, "learning_rate": 4.380137931034483e-06, "loss": 0.2269, "step": 45975 }, { "epoch": 0.45, "grad_norm": 22.720951080322266, "learning_rate": 4.3797068965517244e-06, "loss": 0.3885, "step": 46000 }, { "epoch": 0.45, "grad_norm": 5.200223445892334, "learning_rate": 4.379275862068966e-06, "loss": 0.2859, "step": 46025 }, { "epoch": 0.45, "grad_norm": 16.607675552368164, "learning_rate": 4.378844827586207e-06, "loss": 0.4283, "step": 46050 }, { "epoch": 0.45, "grad_norm": 13.026808738708496, "learning_rate": 4.378413793103449e-06, "loss": 0.261, "step": 46075 }, { "epoch": 0.45, "grad_norm": 19.816408157348633, "learning_rate": 4.37798275862069e-06, "loss": 0.3626, "step": 46100 }, { "epoch": 0.45, "grad_norm": 9.833441734313965, "learning_rate": 4.377551724137931e-06, "loss": 0.2287, "step": 46125 }, { "epoch": 0.45, "grad_norm": 21.733341217041016, "learning_rate": 4.377120689655172e-06, "loss": 0.4327, "step": 46150 }, { "epoch": 0.45, "grad_norm": 8.205660820007324, "learning_rate": 4.376689655172414e-06, "loss": 0.1996, "step": 46175 }, { "epoch": 0.45, "grad_norm": 20.116615295410156, "learning_rate": 4.376258620689656e-06, "loss": 0.3978, "step": 46200 }, { "epoch": 0.45, "grad_norm": 6.793359279632568, "learning_rate": 4.3758275862068975e-06, "loss": 0.1949, "step": 46225 }, { "epoch": 0.45, "grad_norm": 13.023179054260254, "learning_rate": 4.375396551724138e-06, "loss": 0.4176, "step": 46250 }, { "epoch": 0.46, "grad_norm": 11.079754829406738, "learning_rate": 4.37496551724138e-06, "loss": 0.2674, "step": 46275 }, { "epoch": 0.46, "grad_norm": 18.10105323791504, "learning_rate": 4.374534482758621e-06, "loss": 0.4246, "step": 46300 }, { "epoch": 0.46, "grad_norm": 9.181746482849121, "learning_rate": 4.3741034482758625e-06, "loss": 0.206, "step": 46325 }, { "epoch": 0.46, "grad_norm": 10.388075828552246, "learning_rate": 4.373672413793104e-06, "loss": 0.4178, "step": 46350 }, { "epoch": 0.46, "grad_norm": 9.920413970947266, "learning_rate": 4.3732413793103454e-06, "loss": 0.3021, "step": 46375 }, { "epoch": 0.46, "grad_norm": 9.777166366577148, "learning_rate": 4.372810344827587e-06, "loss": 0.3411, "step": 46400 }, { "epoch": 0.46, "grad_norm": 6.944384574890137, "learning_rate": 4.372379310344828e-06, "loss": 0.2138, "step": 46425 }, { "epoch": 0.46, "grad_norm": 23.4280948638916, "learning_rate": 4.371948275862069e-06, "loss": 0.453, "step": 46450 }, { "epoch": 0.46, "grad_norm": 7.367005825042725, "learning_rate": 4.37151724137931e-06, "loss": 0.1961, "step": 46475 }, { "epoch": 0.46, "grad_norm": 23.943466186523438, "learning_rate": 4.371086206896552e-06, "loss": 0.5056, "step": 46500 }, { "epoch": 0.46, "grad_norm": 10.221701622009277, "learning_rate": 4.370655172413793e-06, "loss": 0.2548, "step": 46525 }, { "epoch": 0.46, "grad_norm": Infinity, "learning_rate": 4.370241379310346e-06, "loss": 0.3988, "step": 46550 }, { "epoch": 0.46, "grad_norm": 13.876520156860352, "learning_rate": 4.369810344827586e-06, "loss": 0.2212, "step": 46575 }, { "epoch": 0.46, "grad_norm": 15.520088195800781, "learning_rate": 4.369379310344828e-06, "loss": 0.433, "step": 46600 }, { "epoch": 0.46, "grad_norm": 8.933984756469727, "learning_rate": 4.368948275862069e-06, "loss": 0.2607, "step": 46625 }, { "epoch": 0.46, "grad_norm": 25.17231559753418, "learning_rate": 4.368517241379311e-06, "loss": 0.3885, "step": 46650 }, { "epoch": 0.46, "grad_norm": 10.859273910522461, "learning_rate": 4.368086206896552e-06, "loss": 0.276, "step": 46675 }, { "epoch": 0.46, "grad_norm": 18.22579574584961, "learning_rate": 4.3676551724137935e-06, "loss": 0.3589, "step": 46700 }, { "epoch": 0.46, "grad_norm": 12.959563255310059, "learning_rate": 4.367224137931035e-06, "loss": 0.1961, "step": 46725 }, { "epoch": 0.46, "grad_norm": 22.95594596862793, "learning_rate": 4.3667931034482764e-06, "loss": 0.4254, "step": 46750 }, { "epoch": 0.46, "grad_norm": 8.795223236083984, "learning_rate": 4.366362068965518e-06, "loss": 0.2422, "step": 46775 }, { "epoch": 0.46, "grad_norm": 17.274009704589844, "learning_rate": 4.3659310344827585e-06, "loss": 0.3931, "step": 46800 }, { "epoch": 0.46, "grad_norm": 6.787881851196289, "learning_rate": 4.3655e-06, "loss": 0.2061, "step": 46825 }, { "epoch": 0.46, "grad_norm": 19.447629928588867, "learning_rate": 4.365068965517241e-06, "loss": 0.385, "step": 46850 }, { "epoch": 0.46, "grad_norm": 15.549080848693848, "learning_rate": 4.364637931034484e-06, "loss": 0.295, "step": 46875 }, { "epoch": 0.46, "grad_norm": 19.589174270629883, "learning_rate": 4.364206896551724e-06, "loss": 0.4108, "step": 46900 }, { "epoch": 0.46, "grad_norm": 7.605968952178955, "learning_rate": 4.363775862068966e-06, "loss": 0.2086, "step": 46925 }, { "epoch": 0.46, "grad_norm": 18.738019943237305, "learning_rate": 4.363344827586207e-06, "loss": 0.4819, "step": 46950 }, { "epoch": 0.46, "grad_norm": 7.912668704986572, "learning_rate": 4.362913793103449e-06, "loss": 0.1834, "step": 46975 }, { "epoch": 0.46, "grad_norm": 19.838394165039062, "learning_rate": 4.36248275862069e-06, "loss": 0.4211, "step": 47000 }, { "epoch": 0.46, "grad_norm": 20.616928100585938, "learning_rate": 4.362051724137932e-06, "loss": 0.2405, "step": 47025 }, { "epoch": 0.46, "grad_norm": 22.643903732299805, "learning_rate": 4.361620689655173e-06, "loss": 0.4354, "step": 47050 }, { "epoch": 0.46, "grad_norm": 7.510342597961426, "learning_rate": 4.3611896551724145e-06, "loss": 0.2163, "step": 47075 }, { "epoch": 0.46, "grad_norm": 16.34513282775879, "learning_rate": 4.360758620689656e-06, "loss": 0.4227, "step": 47100 }, { "epoch": 0.46, "grad_norm": 7.021280288696289, "learning_rate": 4.360327586206897e-06, "loss": 0.2424, "step": 47125 }, { "epoch": 0.46, "grad_norm": 19.867719650268555, "learning_rate": 4.359896551724138e-06, "loss": 0.4474, "step": 47150 }, { "epoch": 0.46, "grad_norm": 6.292055606842041, "learning_rate": 4.3594655172413795e-06, "loss": 0.2752, "step": 47175 }, { "epoch": 0.46, "grad_norm": 19.436542510986328, "learning_rate": 4.359034482758621e-06, "loss": 0.4489, "step": 47200 }, { "epoch": 0.46, "grad_norm": 8.129922866821289, "learning_rate": 4.358603448275862e-06, "loss": 0.2301, "step": 47225 }, { "epoch": 0.46, "grad_norm": 18.10527229309082, "learning_rate": 4.358172413793104e-06, "loss": 0.3515, "step": 47250 }, { "epoch": 0.46, "grad_norm": 10.305935859680176, "learning_rate": 4.357741379310345e-06, "loss": 0.2221, "step": 47275 }, { "epoch": 0.47, "grad_norm": 19.235727310180664, "learning_rate": 4.357310344827587e-06, "loss": 0.3715, "step": 47300 }, { "epoch": 0.47, "grad_norm": 14.145712852478027, "learning_rate": 4.356879310344828e-06, "loss": 0.2389, "step": 47325 }, { "epoch": 0.47, "grad_norm": 19.11484146118164, "learning_rate": 4.356448275862069e-06, "loss": 0.3968, "step": 47350 }, { "epoch": 0.47, "grad_norm": 10.010187149047852, "learning_rate": 4.35601724137931e-06, "loss": 0.2201, "step": 47375 }, { "epoch": 0.47, "grad_norm": 16.890213012695312, "learning_rate": 4.355586206896552e-06, "loss": 0.4836, "step": 47400 }, { "epoch": 0.47, "grad_norm": 14.416849136352539, "learning_rate": 4.355155172413793e-06, "loss": 0.2463, "step": 47425 }, { "epoch": 0.47, "grad_norm": 24.56605339050293, "learning_rate": 4.354724137931035e-06, "loss": 0.4363, "step": 47450 }, { "epoch": 0.47, "grad_norm": 10.468782424926758, "learning_rate": 4.354293103448276e-06, "loss": 0.2808, "step": 47475 }, { "epoch": 0.47, "grad_norm": 8.471043586730957, "learning_rate": 4.353862068965518e-06, "loss": 0.3852, "step": 47500 }, { "epoch": 0.47, "grad_norm": 7.574191570281982, "learning_rate": 4.353431034482759e-06, "loss": 0.2189, "step": 47525 }, { "epoch": 0.47, "grad_norm": 16.49521255493164, "learning_rate": 4.3530000000000005e-06, "loss": 0.3727, "step": 47550 }, { "epoch": 0.47, "grad_norm": 11.883112907409668, "learning_rate": 4.352568965517241e-06, "loss": 0.2598, "step": 47575 }, { "epoch": 0.47, "grad_norm": 21.01721954345703, "learning_rate": 4.352137931034483e-06, "loss": 0.3847, "step": 47600 }, { "epoch": 0.47, "grad_norm": 15.597638130187988, "learning_rate": 4.351706896551725e-06, "loss": 0.1954, "step": 47625 }, { "epoch": 0.47, "grad_norm": 16.67473030090332, "learning_rate": 4.351275862068966e-06, "loss": 0.3744, "step": 47650 }, { "epoch": 0.47, "grad_norm": 12.620879173278809, "learning_rate": 4.350844827586208e-06, "loss": 0.2174, "step": 47675 }, { "epoch": 0.47, "grad_norm": 16.493608474731445, "learning_rate": 4.350413793103448e-06, "loss": 0.4912, "step": 47700 }, { "epoch": 0.47, "grad_norm": 7.280346393585205, "learning_rate": 4.34998275862069e-06, "loss": 0.2265, "step": 47725 }, { "epoch": 0.47, "grad_norm": 16.56917953491211, "learning_rate": 4.349551724137931e-06, "loss": 0.42, "step": 47750 }, { "epoch": 0.47, "grad_norm": 12.77587890625, "learning_rate": 4.349120689655173e-06, "loss": 0.2282, "step": 47775 }, { "epoch": 0.47, "grad_norm": 16.481613159179688, "learning_rate": 4.348689655172414e-06, "loss": 0.3811, "step": 47800 }, { "epoch": 0.47, "grad_norm": 9.435842514038086, "learning_rate": 4.348258620689656e-06, "loss": 0.2716, "step": 47825 }, { "epoch": 0.47, "grad_norm": 16.660324096679688, "learning_rate": 4.347827586206897e-06, "loss": 0.402, "step": 47850 }, { "epoch": 0.47, "grad_norm": 7.543620586395264, "learning_rate": 4.347396551724139e-06, "loss": 0.2266, "step": 47875 }, { "epoch": 0.47, "grad_norm": 15.495749473571777, "learning_rate": 4.34696551724138e-06, "loss": 0.3968, "step": 47900 }, { "epoch": 0.47, "grad_norm": 12.257083892822266, "learning_rate": 4.346534482758621e-06, "loss": 0.2208, "step": 47925 }, { "epoch": 0.47, "grad_norm": 17.41520881652832, "learning_rate": 4.346103448275862e-06, "loss": 0.392, "step": 47950 }, { "epoch": 0.47, "grad_norm": 8.478535652160645, "learning_rate": 4.3456724137931036e-06, "loss": 0.204, "step": 47975 }, { "epoch": 0.47, "grad_norm": 11.832317352294922, "learning_rate": 4.345241379310345e-06, "loss": 0.4443, "step": 48000 }, { "epoch": 0.47, "grad_norm": 11.115852355957031, "learning_rate": 4.3448103448275865e-06, "loss": 0.2561, "step": 48025 }, { "epoch": 0.47, "grad_norm": 17.148880004882812, "learning_rate": 4.344379310344828e-06, "loss": 0.4014, "step": 48050 }, { "epoch": 0.47, "grad_norm": 6.657584190368652, "learning_rate": 4.343948275862069e-06, "loss": 0.2374, "step": 48075 }, { "epoch": 0.47, "grad_norm": 21.45560646057129, "learning_rate": 4.343517241379311e-06, "loss": 0.404, "step": 48100 }, { "epoch": 0.47, "grad_norm": 18.40523910522461, "learning_rate": 4.3430862068965515e-06, "loss": 0.2152, "step": 48125 }, { "epoch": 0.47, "grad_norm": 20.747880935668945, "learning_rate": 4.342655172413793e-06, "loss": 0.4626, "step": 48150 }, { "epoch": 0.47, "grad_norm": 5.174261093139648, "learning_rate": 4.342224137931035e-06, "loss": 0.1986, "step": 48175 }, { "epoch": 0.47, "grad_norm": 16.768362045288086, "learning_rate": 4.341793103448277e-06, "loss": 0.4241, "step": 48200 }, { "epoch": 0.47, "grad_norm": 11.027729988098145, "learning_rate": 4.341362068965518e-06, "loss": 0.2312, "step": 48225 }, { "epoch": 0.47, "grad_norm": 11.226086616516113, "learning_rate": 4.340931034482759e-06, "loss": 0.4157, "step": 48250 }, { "epoch": 0.47, "grad_norm": 16.32867431640625, "learning_rate": 4.3405e-06, "loss": 0.187, "step": 48275 }, { "epoch": 0.48, "grad_norm": 21.07134437561035, "learning_rate": 4.340068965517242e-06, "loss": 0.4329, "step": 48300 }, { "epoch": 0.48, "grad_norm": 6.607348918914795, "learning_rate": 4.339637931034483e-06, "loss": 0.2226, "step": 48325 }, { "epoch": 0.48, "grad_norm": 18.347198486328125, "learning_rate": 4.3392068965517246e-06, "loss": 0.4001, "step": 48350 }, { "epoch": 0.48, "grad_norm": 7.6744184494018555, "learning_rate": 4.338775862068966e-06, "loss": 0.2566, "step": 48375 }, { "epoch": 0.48, "grad_norm": 15.429045677185059, "learning_rate": 4.3383448275862075e-06, "loss": 0.4597, "step": 48400 }, { "epoch": 0.48, "grad_norm": 9.835613250732422, "learning_rate": 4.337913793103449e-06, "loss": 0.2134, "step": 48425 }, { "epoch": 0.48, "grad_norm": 10.202396392822266, "learning_rate": 4.33748275862069e-06, "loss": 0.4422, "step": 48450 }, { "epoch": 0.48, "grad_norm": 6.918922424316406, "learning_rate": 4.337051724137931e-06, "loss": 0.2593, "step": 48475 }, { "epoch": 0.48, "grad_norm": 16.727270126342773, "learning_rate": 4.3366206896551725e-06, "loss": 0.3818, "step": 48500 }, { "epoch": 0.48, "grad_norm": 8.499776840209961, "learning_rate": 4.336189655172414e-06, "loss": 0.2191, "step": 48525 }, { "epoch": 0.48, "grad_norm": 16.34202766418457, "learning_rate": 4.335758620689655e-06, "loss": 0.4022, "step": 48550 }, { "epoch": 0.48, "grad_norm": 4.230836868286133, "learning_rate": 4.335327586206897e-06, "loss": 0.2205, "step": 48575 }, { "epoch": 0.48, "grad_norm": 27.164798736572266, "learning_rate": 4.334896551724138e-06, "loss": 0.418, "step": 48600 }, { "epoch": 0.48, "grad_norm": 9.744872093200684, "learning_rate": 4.33446551724138e-06, "loss": 0.2173, "step": 48625 }, { "epoch": 0.48, "grad_norm": 14.964539527893066, "learning_rate": 4.334034482758621e-06, "loss": 0.4401, "step": 48650 }, { "epoch": 0.48, "grad_norm": 7.200443267822266, "learning_rate": 4.333603448275863e-06, "loss": 0.1832, "step": 48675 }, { "epoch": 0.48, "grad_norm": 15.77879810333252, "learning_rate": 4.333172413793103e-06, "loss": 0.4133, "step": 48700 }, { "epoch": 0.48, "grad_norm": 6.245547294616699, "learning_rate": 4.332741379310345e-06, "loss": 0.2487, "step": 48725 }, { "epoch": 0.48, "grad_norm": 21.09962272644043, "learning_rate": 4.332327586206897e-06, "loss": 0.428, "step": 48750 }, { "epoch": 0.48, "grad_norm": 10.59361457824707, "learning_rate": 4.3318965517241385e-06, "loss": 0.1844, "step": 48775 }, { "epoch": 0.48, "grad_norm": 19.012834548950195, "learning_rate": 4.331465517241379e-06, "loss": 0.3603, "step": 48800 }, { "epoch": 0.48, "grad_norm": 12.835332870483398, "learning_rate": 4.3310344827586206e-06, "loss": 0.2474, "step": 48825 }, { "epoch": 0.48, "grad_norm": 18.185850143432617, "learning_rate": 4.330603448275862e-06, "loss": 0.4249, "step": 48850 }, { "epoch": 0.48, "grad_norm": 5.811707973480225, "learning_rate": 4.330172413793104e-06, "loss": 0.2057, "step": 48875 }, { "epoch": 0.48, "grad_norm": 18.12503433227539, "learning_rate": 4.329741379310346e-06, "loss": 0.4469, "step": 48900 }, { "epoch": 0.48, "grad_norm": 14.215725898742676, "learning_rate": 4.329310344827586e-06, "loss": 0.2624, "step": 48925 }, { "epoch": 0.48, "grad_norm": 18.46603775024414, "learning_rate": 4.328879310344828e-06, "loss": 0.4902, "step": 48950 }, { "epoch": 0.48, "grad_norm": 14.705146789550781, "learning_rate": 4.328448275862069e-06, "loss": 0.2725, "step": 48975 }, { "epoch": 0.48, "grad_norm": 17.902666091918945, "learning_rate": 4.328017241379311e-06, "loss": 0.3891, "step": 49000 }, { "epoch": 0.48, "grad_norm": 9.0667724609375, "learning_rate": 4.327586206896552e-06, "loss": 0.2305, "step": 49025 }, { "epoch": 0.48, "grad_norm": 14.423707008361816, "learning_rate": 4.327155172413794e-06, "loss": 0.4196, "step": 49050 }, { "epoch": 0.48, "grad_norm": 12.4293212890625, "learning_rate": 4.326724137931035e-06, "loss": 0.2382, "step": 49075 }, { "epoch": 0.48, "grad_norm": 13.119214057922363, "learning_rate": 4.3262931034482766e-06, "loss": 0.3879, "step": 49100 }, { "epoch": 0.48, "grad_norm": 8.782720565795898, "learning_rate": 4.325862068965518e-06, "loss": 0.2476, "step": 49125 }, { "epoch": 0.48, "grad_norm": 21.198917388916016, "learning_rate": 4.325431034482759e-06, "loss": 0.4146, "step": 49150 }, { "epoch": 0.48, "grad_norm": 9.642070770263672, "learning_rate": 4.325e-06, "loss": 0.2326, "step": 49175 }, { "epoch": 0.48, "grad_norm": 7.7312397956848145, "learning_rate": 4.3245689655172416e-06, "loss": 0.4024, "step": 49200 }, { "epoch": 0.48, "grad_norm": 10.048005104064941, "learning_rate": 4.324137931034483e-06, "loss": 0.2694, "step": 49225 }, { "epoch": 0.48, "grad_norm": 18.110071182250977, "learning_rate": 4.3237068965517245e-06, "loss": 0.3542, "step": 49250 }, { "epoch": 0.48, "grad_norm": 9.530168533325195, "learning_rate": 4.323275862068966e-06, "loss": 0.2046, "step": 49275 }, { "epoch": 0.48, "grad_norm": 13.741641998291016, "learning_rate": 4.322844827586207e-06, "loss": 0.4206, "step": 49300 }, { "epoch": 0.49, "grad_norm": 16.250329971313477, "learning_rate": 4.322413793103449e-06, "loss": 0.2283, "step": 49325 }, { "epoch": 0.49, "grad_norm": 22.73543357849121, "learning_rate": 4.32198275862069e-06, "loss": 0.5644, "step": 49350 }, { "epoch": 0.49, "grad_norm": 10.748140335083008, "learning_rate": 4.321551724137931e-06, "loss": 0.2454, "step": 49375 }, { "epoch": 0.49, "grad_norm": 20.591161727905273, "learning_rate": 4.321120689655172e-06, "loss": 0.4361, "step": 49400 }, { "epoch": 0.49, "grad_norm": 13.043049812316895, "learning_rate": 4.320689655172414e-06, "loss": 0.2771, "step": 49425 }, { "epoch": 0.49, "grad_norm": 25.169727325439453, "learning_rate": 4.320258620689656e-06, "loss": 0.3955, "step": 49450 }, { "epoch": 0.49, "grad_norm": 7.458559036254883, "learning_rate": 4.319827586206897e-06, "loss": 0.2342, "step": 49475 }, { "epoch": 0.49, "grad_norm": 14.633256912231445, "learning_rate": 4.319396551724138e-06, "loss": 0.4154, "step": 49500 }, { "epoch": 0.49, "grad_norm": 2.8756661415100098, "learning_rate": 4.31896551724138e-06, "loss": 0.2103, "step": 49525 }, { "epoch": 0.49, "grad_norm": 19.784677505493164, "learning_rate": 4.318534482758621e-06, "loss": 0.3761, "step": 49550 }, { "epoch": 0.49, "grad_norm": 10.418843269348145, "learning_rate": 4.3181034482758626e-06, "loss": 0.2103, "step": 49575 }, { "epoch": 0.49, "grad_norm": 18.292705535888672, "learning_rate": 4.317672413793104e-06, "loss": 0.3622, "step": 49600 }, { "epoch": 0.49, "grad_norm": 10.055800437927246, "learning_rate": 4.3172413793103455e-06, "loss": 0.2047, "step": 49625 }, { "epoch": 0.49, "grad_norm": 16.84031105041504, "learning_rate": 4.316810344827587e-06, "loss": 0.4052, "step": 49650 }, { "epoch": 0.49, "grad_norm": 8.684881210327148, "learning_rate": 4.316379310344828e-06, "loss": 0.2016, "step": 49675 }, { "epoch": 0.49, "grad_norm": 22.825271606445312, "learning_rate": 4.315948275862069e-06, "loss": 0.3954, "step": 49700 }, { "epoch": 0.49, "grad_norm": 15.99292278289795, "learning_rate": 4.3155172413793104e-06, "loss": 0.1749, "step": 49725 }, { "epoch": 0.49, "grad_norm": 20.316125869750977, "learning_rate": 4.315086206896552e-06, "loss": 0.4024, "step": 49750 }, { "epoch": 0.49, "grad_norm": 5.618777751922607, "learning_rate": 4.314655172413793e-06, "loss": 0.2435, "step": 49775 }, { "epoch": 0.49, "grad_norm": 19.138259887695312, "learning_rate": 4.314224137931035e-06, "loss": 0.3512, "step": 49800 }, { "epoch": 0.49, "grad_norm": 10.802790641784668, "learning_rate": 4.313793103448276e-06, "loss": 0.2042, "step": 49825 }, { "epoch": 0.49, "grad_norm": 16.613040924072266, "learning_rate": 4.313362068965518e-06, "loss": 0.4001, "step": 49850 }, { "epoch": 0.49, "grad_norm": 9.701685905456543, "learning_rate": 4.312931034482759e-06, "loss": 0.2213, "step": 49875 }, { "epoch": 0.49, "grad_norm": 15.799375534057617, "learning_rate": 4.312500000000001e-06, "loss": 0.3503, "step": 49900 }, { "epoch": 0.49, "grad_norm": 10.791797637939453, "learning_rate": 4.312068965517241e-06, "loss": 0.2242, "step": 49925 }, { "epoch": 0.49, "grad_norm": 10.565953254699707, "learning_rate": 4.311637931034483e-06, "loss": 0.4722, "step": 49950 }, { "epoch": 0.49, "grad_norm": 10.608716011047363, "learning_rate": 4.311206896551724e-06, "loss": 0.1996, "step": 49975 }, { "epoch": 0.49, "grad_norm": 13.829283714294434, "learning_rate": 4.310775862068966e-06, "loss": 0.3972, "step": 50000 }, { "epoch": 0.49, "grad_norm": 11.071718215942383, "learning_rate": 4.310344827586207e-06, "loss": 0.2058, "step": 50025 }, { "epoch": 0.49, "grad_norm": 20.7458553314209, "learning_rate": 4.3099137931034485e-06, "loss": 0.3857, "step": 50050 }, { "epoch": 0.49, "grad_norm": 10.530301094055176, "learning_rate": 4.30948275862069e-06, "loss": 0.2016, "step": 50075 }, { "epoch": 0.49, "grad_norm": 23.898801803588867, "learning_rate": 4.3090517241379314e-06, "loss": 0.3983, "step": 50100 }, { "epoch": 0.49, "grad_norm": 12.881850242614746, "learning_rate": 4.308620689655173e-06, "loss": 0.2344, "step": 50125 }, { "epoch": 0.49, "grad_norm": 15.799628257751465, "learning_rate": 4.308189655172414e-06, "loss": 0.4155, "step": 50150 }, { "epoch": 0.49, "grad_norm": 5.924431800842285, "learning_rate": 4.307758620689656e-06, "loss": 0.1763, "step": 50175 }, { "epoch": 0.49, "grad_norm": 14.80120849609375, "learning_rate": 4.307327586206897e-06, "loss": 0.424, "step": 50200 }, { "epoch": 0.49, "grad_norm": 9.348456382751465, "learning_rate": 4.306896551724139e-06, "loss": 0.2155, "step": 50225 }, { "epoch": 0.49, "grad_norm": 21.44217872619629, "learning_rate": 4.306465517241379e-06, "loss": 0.4662, "step": 50250 }, { "epoch": 0.49, "grad_norm": 8.10607624053955, "learning_rate": 4.306034482758621e-06, "loss": 0.219, "step": 50275 }, { "epoch": 0.49, "grad_norm": 15.524029731750488, "learning_rate": 4.305603448275862e-06, "loss": 0.4247, "step": 50300 }, { "epoch": 0.49, "grad_norm": 8.76081657409668, "learning_rate": 4.305172413793104e-06, "loss": 0.2143, "step": 50325 }, { "epoch": 0.5, "grad_norm": 13.707195281982422, "learning_rate": 4.304741379310345e-06, "loss": 0.3624, "step": 50350 }, { "epoch": 0.5, "grad_norm": 4.130409240722656, "learning_rate": 4.304310344827587e-06, "loss": 0.2187, "step": 50375 }, { "epoch": 0.5, "grad_norm": 22.106937408447266, "learning_rate": 4.303879310344828e-06, "loss": 0.383, "step": 50400 }, { "epoch": 0.5, "grad_norm": 11.982297897338867, "learning_rate": 4.3034482758620695e-06, "loss": 0.2353, "step": 50425 }, { "epoch": 0.5, "grad_norm": 19.82569122314453, "learning_rate": 4.303017241379311e-06, "loss": 0.3798, "step": 50450 }, { "epoch": 0.5, "grad_norm": 8.549561500549316, "learning_rate": 4.302586206896552e-06, "loss": 0.196, "step": 50475 }, { "epoch": 0.5, "grad_norm": 17.01948356628418, "learning_rate": 4.302155172413793e-06, "loss": 0.4154, "step": 50500 }, { "epoch": 0.5, "grad_norm": 11.658954620361328, "learning_rate": 4.3017241379310345e-06, "loss": 0.2314, "step": 50525 }, { "epoch": 0.5, "grad_norm": 19.384397506713867, "learning_rate": 4.301293103448276e-06, "loss": 0.4509, "step": 50550 }, { "epoch": 0.5, "grad_norm": 15.602794647216797, "learning_rate": 4.300862068965517e-06, "loss": 0.2379, "step": 50575 }, { "epoch": 0.5, "grad_norm": 13.8821439743042, "learning_rate": 4.300431034482759e-06, "loss": 0.4119, "step": 50600 }, { "epoch": 0.5, "grad_norm": 9.016240119934082, "learning_rate": 4.3e-06, "loss": 0.3217, "step": 50625 }, { "epoch": 0.5, "grad_norm": 19.731483459472656, "learning_rate": 4.299568965517242e-06, "loss": 0.4168, "step": 50650 }, { "epoch": 0.5, "grad_norm": 12.561285018920898, "learning_rate": 4.299137931034483e-06, "loss": 0.2464, "step": 50675 }, { "epoch": 0.5, "grad_norm": 22.59878158569336, "learning_rate": 4.298706896551724e-06, "loss": 0.3881, "step": 50700 }, { "epoch": 0.5, "grad_norm": 13.293218612670898, "learning_rate": 4.298275862068966e-06, "loss": 0.2386, "step": 50725 }, { "epoch": 0.5, "grad_norm": 17.26178550720215, "learning_rate": 4.297844827586208e-06, "loss": 0.3124, "step": 50750 }, { "epoch": 0.5, "grad_norm": 11.573773384094238, "learning_rate": 4.297413793103449e-06, "loss": 0.2334, "step": 50775 }, { "epoch": 0.5, "grad_norm": 15.567950248718262, "learning_rate": 4.2970000000000005e-06, "loss": 0.4582, "step": 50800 }, { "epoch": 0.5, "grad_norm": 13.420207023620605, "learning_rate": 4.296568965517241e-06, "loss": 0.1896, "step": 50825 }, { "epoch": 0.5, "grad_norm": 22.359210968017578, "learning_rate": 4.2961379310344834e-06, "loss": 0.4245, "step": 50850 }, { "epoch": 0.5, "grad_norm": 13.734345436096191, "learning_rate": 4.295706896551725e-06, "loss": 0.3027, "step": 50875 }, { "epoch": 0.5, "grad_norm": 25.230199813842773, "learning_rate": 4.295275862068966e-06, "loss": 0.4361, "step": 50900 }, { "epoch": 0.5, "grad_norm": 8.17070484161377, "learning_rate": 4.294844827586207e-06, "loss": 0.2315, "step": 50925 }, { "epoch": 0.5, "grad_norm": 19.59292984008789, "learning_rate": 4.2944137931034484e-06, "loss": 0.4012, "step": 50950 }, { "epoch": 0.5, "grad_norm": 7.885794162750244, "learning_rate": 4.29398275862069e-06, "loss": 0.2527, "step": 50975 }, { "epoch": 0.5, "grad_norm": 8.072739601135254, "learning_rate": 4.293551724137931e-06, "loss": 0.4152, "step": 51000 }, { "epoch": 0.5, "grad_norm": 7.666884899139404, "learning_rate": 4.293120689655173e-06, "loss": 0.2164, "step": 51025 }, { "epoch": 0.5, "grad_norm": 12.437077522277832, "learning_rate": 4.292689655172414e-06, "loss": 0.4108, "step": 51050 }, { "epoch": 0.5, "grad_norm": 6.6080780029296875, "learning_rate": 4.292258620689656e-06, "loss": 0.2187, "step": 51075 }, { "epoch": 0.5, "grad_norm": 16.104110717773438, "learning_rate": 4.291827586206897e-06, "loss": 0.4297, "step": 51100 }, { "epoch": 0.5, "grad_norm": 11.861888885498047, "learning_rate": 4.291396551724139e-06, "loss": 0.2307, "step": 51125 }, { "epoch": 0.5, "grad_norm": 14.691327095031738, "learning_rate": 4.290965517241379e-06, "loss": 0.4896, "step": 51150 }, { "epoch": 0.5, "grad_norm": 12.574609756469727, "learning_rate": 4.290534482758621e-06, "loss": 0.1995, "step": 51175 }, { "epoch": 0.5, "grad_norm": 17.95547866821289, "learning_rate": 4.290103448275862e-06, "loss": 0.4419, "step": 51200 }, { "epoch": 0.5, "grad_norm": 15.262892723083496, "learning_rate": 4.289672413793104e-06, "loss": 0.2591, "step": 51225 }, { "epoch": 0.5, "grad_norm": 14.337596893310547, "learning_rate": 4.289241379310345e-06, "loss": 0.474, "step": 51250 }, { "epoch": 0.5, "grad_norm": 11.74660873413086, "learning_rate": 4.2888103448275865e-06, "loss": 0.2553, "step": 51275 }, { "epoch": 0.5, "grad_norm": 19.719524383544922, "learning_rate": 4.288379310344828e-06, "loss": 0.346, "step": 51300 }, { "epoch": 0.5, "grad_norm": 6.0528130531311035, "learning_rate": 4.2879482758620694e-06, "loss": 0.25, "step": 51325 }, { "epoch": 0.51, "grad_norm": 17.404428482055664, "learning_rate": 4.287517241379311e-06, "loss": 0.4445, "step": 51350 }, { "epoch": 0.51, "grad_norm": 9.078980445861816, "learning_rate": 4.2870862068965515e-06, "loss": 0.237, "step": 51375 }, { "epoch": 0.51, "grad_norm": 19.431472778320312, "learning_rate": 4.286655172413793e-06, "loss": 0.3691, "step": 51400 }, { "epoch": 0.51, "grad_norm": 16.255611419677734, "learning_rate": 4.286224137931035e-06, "loss": 0.2216, "step": 51425 }, { "epoch": 0.51, "grad_norm": 20.921831130981445, "learning_rate": 4.285793103448277e-06, "loss": 0.4654, "step": 51450 }, { "epoch": 0.51, "grad_norm": 12.305278778076172, "learning_rate": 4.285362068965517e-06, "loss": 0.2351, "step": 51475 }, { "epoch": 0.51, "grad_norm": 15.059043884277344, "learning_rate": 4.284931034482759e-06, "loss": 0.426, "step": 51500 }, { "epoch": 0.51, "grad_norm": 10.608603477478027, "learning_rate": 4.2845e-06, "loss": 0.2393, "step": 51525 }, { "epoch": 0.51, "grad_norm": 18.04697036743164, "learning_rate": 4.284068965517242e-06, "loss": 0.3599, "step": 51550 }, { "epoch": 0.51, "grad_norm": 9.202003479003906, "learning_rate": 4.283637931034483e-06, "loss": 0.223, "step": 51575 }, { "epoch": 0.51, "grad_norm": 14.957218170166016, "learning_rate": 4.283206896551725e-06, "loss": 0.3203, "step": 51600 }, { "epoch": 0.51, "grad_norm": 22.117761611938477, "learning_rate": 4.282775862068966e-06, "loss": 0.2129, "step": 51625 }, { "epoch": 0.51, "grad_norm": 11.319721221923828, "learning_rate": 4.2823448275862075e-06, "loss": 0.304, "step": 51650 }, { "epoch": 0.51, "grad_norm": 8.556808471679688, "learning_rate": 4.281913793103449e-06, "loss": 0.2145, "step": 51675 }, { "epoch": 0.51, "grad_norm": 23.86703109741211, "learning_rate": 4.28148275862069e-06, "loss": 0.4994, "step": 51700 }, { "epoch": 0.51, "grad_norm": 12.2849760055542, "learning_rate": 4.281051724137931e-06, "loss": 0.2375, "step": 51725 }, { "epoch": 0.51, "grad_norm": 12.970808982849121, "learning_rate": 4.2806206896551725e-06, "loss": 0.3823, "step": 51750 }, { "epoch": 0.51, "grad_norm": 9.034952163696289, "learning_rate": 4.280189655172414e-06, "loss": 0.2941, "step": 51775 }, { "epoch": 0.51, "grad_norm": 16.76397705078125, "learning_rate": 4.279758620689655e-06, "loss": 0.406, "step": 51800 }, { "epoch": 0.51, "grad_norm": 10.400449752807617, "learning_rate": 4.279327586206897e-06, "loss": 0.2082, "step": 51825 }, { "epoch": 0.51, "grad_norm": 14.100239753723145, "learning_rate": 4.278896551724138e-06, "loss": 0.4101, "step": 51850 }, { "epoch": 0.51, "grad_norm": 10.886175155639648, "learning_rate": 4.27846551724138e-06, "loss": 0.1952, "step": 51875 }, { "epoch": 0.51, "grad_norm": 19.118024826049805, "learning_rate": 4.278034482758621e-06, "loss": 0.3673, "step": 51900 }, { "epoch": 0.51, "grad_norm": 6.661749362945557, "learning_rate": 4.277603448275862e-06, "loss": 0.2215, "step": 51925 }, { "epoch": 0.51, "grad_norm": 14.853191375732422, "learning_rate": 4.277172413793103e-06, "loss": 0.4265, "step": 51950 }, { "epoch": 0.51, "grad_norm": 13.040432929992676, "learning_rate": 4.276741379310345e-06, "loss": 0.2252, "step": 51975 }, { "epoch": 0.51, "grad_norm": 23.210899353027344, "learning_rate": 4.276310344827587e-06, "loss": 0.3736, "step": 52000 }, { "epoch": 0.51, "grad_norm": 13.76315975189209, "learning_rate": 4.2758793103448285e-06, "loss": 0.2252, "step": 52025 }, { "epoch": 0.51, "grad_norm": 13.715047836303711, "learning_rate": 4.275448275862069e-06, "loss": 0.4232, "step": 52050 }, { "epoch": 0.51, "grad_norm": 15.422096252441406, "learning_rate": 4.275017241379311e-06, "loss": 0.1951, "step": 52075 }, { "epoch": 0.51, "grad_norm": 19.967681884765625, "learning_rate": 4.274586206896552e-06, "loss": 0.4343, "step": 52100 }, { "epoch": 0.51, "grad_norm": 5.380831241607666, "learning_rate": 4.2741551724137935e-06, "loss": 0.211, "step": 52125 }, { "epoch": 0.51, "grad_norm": 20.367374420166016, "learning_rate": 4.273724137931035e-06, "loss": 0.4275, "step": 52150 }, { "epoch": 0.51, "grad_norm": 5.39565372467041, "learning_rate": 4.273293103448276e-06, "loss": 0.2534, "step": 52175 }, { "epoch": 0.51, "grad_norm": 15.7379732131958, "learning_rate": 4.272862068965518e-06, "loss": 0.4496, "step": 52200 }, { "epoch": 0.51, "grad_norm": 10.406715393066406, "learning_rate": 4.272431034482759e-06, "loss": 0.2455, "step": 52225 }, { "epoch": 0.51, "grad_norm": 19.979488372802734, "learning_rate": 4.272000000000001e-06, "loss": 0.4131, "step": 52250 }, { "epoch": 0.51, "grad_norm": 7.570291042327881, "learning_rate": 4.271568965517241e-06, "loss": 0.2237, "step": 52275 }, { "epoch": 0.51, "grad_norm": 12.896323204040527, "learning_rate": 4.271137931034483e-06, "loss": 0.3853, "step": 52300 }, { "epoch": 0.51, "grad_norm": 9.033882141113281, "learning_rate": 4.270706896551724e-06, "loss": 0.2594, "step": 52325 }, { "epoch": 0.51, "grad_norm": 17.618053436279297, "learning_rate": 4.270275862068966e-06, "loss": 0.3868, "step": 52350 }, { "epoch": 0.52, "grad_norm": 8.348601341247559, "learning_rate": 4.269844827586207e-06, "loss": 0.1859, "step": 52375 }, { "epoch": 0.52, "grad_norm": 7.395592212677002, "learning_rate": 4.269413793103449e-06, "loss": 0.3752, "step": 52400 }, { "epoch": 0.52, "grad_norm": 27.62714958190918, "learning_rate": 4.26898275862069e-06, "loss": 0.2001, "step": 52425 }, { "epoch": 0.52, "grad_norm": 20.505685806274414, "learning_rate": 4.268551724137932e-06, "loss": 0.4464, "step": 52450 }, { "epoch": 0.52, "grad_norm": 15.744157791137695, "learning_rate": 4.268120689655173e-06, "loss": 0.2514, "step": 52475 }, { "epoch": 0.52, "grad_norm": 17.820907592773438, "learning_rate": 4.267689655172414e-06, "loss": 0.4089, "step": 52500 }, { "epoch": 0.52, "grad_norm": 11.057650566101074, "learning_rate": 4.267258620689655e-06, "loss": 0.2386, "step": 52525 }, { "epoch": 0.52, "grad_norm": 27.398345947265625, "learning_rate": 4.2668275862068966e-06, "loss": 0.4258, "step": 52550 }, { "epoch": 0.52, "grad_norm": 0.6027886867523193, "learning_rate": 4.266396551724139e-06, "loss": 0.1926, "step": 52575 }, { "epoch": 0.52, "grad_norm": 14.293137550354004, "learning_rate": 4.2659655172413795e-06, "loss": 0.4111, "step": 52600 }, { "epoch": 0.52, "grad_norm": 13.17277717590332, "learning_rate": 4.265534482758621e-06, "loss": 0.2062, "step": 52625 }, { "epoch": 0.52, "grad_norm": 13.5430269241333, "learning_rate": 4.265103448275862e-06, "loss": 0.4144, "step": 52650 }, { "epoch": 0.52, "grad_norm": 8.029256820678711, "learning_rate": 4.264672413793104e-06, "loss": 0.2415, "step": 52675 }, { "epoch": 0.52, "grad_norm": 20.8430118560791, "learning_rate": 4.264241379310345e-06, "loss": 0.3564, "step": 52700 }, { "epoch": 0.52, "grad_norm": 7.32933235168457, "learning_rate": 4.263810344827587e-06, "loss": 0.2313, "step": 52725 }, { "epoch": 0.52, "grad_norm": 23.26517677307129, "learning_rate": 4.263379310344828e-06, "loss": 0.43, "step": 52750 }, { "epoch": 0.52, "grad_norm": 8.208526611328125, "learning_rate": 4.26294827586207e-06, "loss": 0.2187, "step": 52775 }, { "epoch": 0.52, "grad_norm": 21.389020919799805, "learning_rate": 4.262517241379311e-06, "loss": 0.4447, "step": 52800 }, { "epoch": 0.52, "grad_norm": 9.141560554504395, "learning_rate": 4.262086206896552e-06, "loss": 0.2719, "step": 52825 }, { "epoch": 0.52, "grad_norm": 25.077707290649414, "learning_rate": 4.261655172413793e-06, "loss": 0.4229, "step": 52850 }, { "epoch": 0.52, "grad_norm": 15.806678771972656, "learning_rate": 4.261224137931035e-06, "loss": 0.2087, "step": 52875 }, { "epoch": 0.52, "grad_norm": Infinity, "learning_rate": 4.260810344827587e-06, "loss": 0.4117, "step": 52900 }, { "epoch": 0.52, "grad_norm": 4.867247104644775, "learning_rate": 4.260379310344828e-06, "loss": 0.2083, "step": 52925 }, { "epoch": 0.52, "grad_norm": 20.888530731201172, "learning_rate": 4.259948275862069e-06, "loss": 0.4576, "step": 52950 }, { "epoch": 0.52, "grad_norm": 8.00349235534668, "learning_rate": 4.2595172413793105e-06, "loss": 0.1833, "step": 52975 }, { "epoch": 0.52, "grad_norm": 14.539368629455566, "learning_rate": 4.259086206896552e-06, "loss": 0.4572, "step": 53000 }, { "epoch": 0.52, "grad_norm": 17.71281623840332, "learning_rate": 4.258655172413793e-06, "loss": 0.2667, "step": 53025 }, { "epoch": 0.52, "grad_norm": 21.047958374023438, "learning_rate": 4.258224137931035e-06, "loss": 0.4052, "step": 53050 }, { "epoch": 0.52, "grad_norm": 13.142719268798828, "learning_rate": 4.257793103448276e-06, "loss": 0.2617, "step": 53075 }, { "epoch": 0.52, "grad_norm": 17.07082176208496, "learning_rate": 4.257362068965518e-06, "loss": 0.4484, "step": 53100 }, { "epoch": 0.52, "grad_norm": 9.199559211730957, "learning_rate": 4.256931034482759e-06, "loss": 0.2152, "step": 53125 }, { "epoch": 0.52, "grad_norm": 13.733331680297852, "learning_rate": 4.2565e-06, "loss": 0.4162, "step": 53150 }, { "epoch": 0.52, "grad_norm": 8.87488842010498, "learning_rate": 4.256068965517241e-06, "loss": 0.2146, "step": 53175 }, { "epoch": 0.52, "grad_norm": 9.059826850891113, "learning_rate": 4.255637931034483e-06, "loss": 0.3614, "step": 53200 }, { "epoch": 0.52, "grad_norm": 12.830284118652344, "learning_rate": 4.255206896551724e-06, "loss": 0.2254, "step": 53225 }, { "epoch": 0.52, "grad_norm": 18.859289169311523, "learning_rate": 4.2547758620689665e-06, "loss": 0.4301, "step": 53250 }, { "epoch": 0.52, "grad_norm": 11.888760566711426, "learning_rate": 4.254344827586207e-06, "loss": 0.1999, "step": 53275 }, { "epoch": 0.52, "grad_norm": 15.60858154296875, "learning_rate": 4.2539137931034486e-06, "loss": 0.4515, "step": 53300 }, { "epoch": 0.52, "grad_norm": 8.291271209716797, "learning_rate": 4.25348275862069e-06, "loss": 0.2477, "step": 53325 }, { "epoch": 0.52, "grad_norm": 11.090579986572266, "learning_rate": 4.2530517241379315e-06, "loss": 0.3793, "step": 53350 }, { "epoch": 0.52, "grad_norm": 3.256408214569092, "learning_rate": 4.252620689655172e-06, "loss": 0.2214, "step": 53375 }, { "epoch": 0.53, "grad_norm": 17.04633331298828, "learning_rate": 4.252189655172414e-06, "loss": 0.4189, "step": 53400 }, { "epoch": 0.53, "grad_norm": 13.847761154174805, "learning_rate": 4.251758620689656e-06, "loss": 0.2524, "step": 53425 }, { "epoch": 0.53, "grad_norm": 16.269437789916992, "learning_rate": 4.251327586206897e-06, "loss": 0.4332, "step": 53450 }, { "epoch": 0.53, "grad_norm": 5.274104595184326, "learning_rate": 4.250896551724139e-06, "loss": 0.2276, "step": 53475 }, { "epoch": 0.53, "grad_norm": 9.720086097717285, "learning_rate": 4.250465517241379e-06, "loss": 0.3705, "step": 53500 }, { "epoch": 0.53, "grad_norm": 8.652731895446777, "learning_rate": 4.250034482758621e-06, "loss": 0.1878, "step": 53525 }, { "epoch": 0.53, "grad_norm": 14.93252182006836, "learning_rate": 4.249603448275862e-06, "loss": 0.3845, "step": 53550 }, { "epoch": 0.53, "grad_norm": 10.494071006774902, "learning_rate": 4.249172413793104e-06, "loss": 0.208, "step": 53575 }, { "epoch": 0.53, "grad_norm": 19.36760711669922, "learning_rate": 4.248741379310345e-06, "loss": 0.3793, "step": 53600 }, { "epoch": 0.53, "grad_norm": 10.296930313110352, "learning_rate": 4.248310344827587e-06, "loss": 0.1822, "step": 53625 }, { "epoch": 0.53, "grad_norm": 16.077983856201172, "learning_rate": 4.247879310344828e-06, "loss": 0.4367, "step": 53650 }, { "epoch": 0.53, "grad_norm": 9.533199310302734, "learning_rate": 4.2474482758620696e-06, "loss": 0.2106, "step": 53675 }, { "epoch": 0.53, "grad_norm": 10.494033813476562, "learning_rate": 4.247017241379311e-06, "loss": 0.4139, "step": 53700 }, { "epoch": 0.53, "grad_norm": 13.293660163879395, "learning_rate": 4.246586206896552e-06, "loss": 0.197, "step": 53725 }, { "epoch": 0.53, "grad_norm": 20.925861358642578, "learning_rate": 4.246155172413793e-06, "loss": 0.4366, "step": 53750 }, { "epoch": 0.53, "grad_norm": 6.360239028930664, "learning_rate": 4.2457241379310345e-06, "loss": 0.274, "step": 53775 }, { "epoch": 0.53, "grad_norm": 23.617061614990234, "learning_rate": 4.245293103448276e-06, "loss": 0.4082, "step": 53800 }, { "epoch": 0.53, "grad_norm": 5.133880138397217, "learning_rate": 4.2448620689655174e-06, "loss": 0.2471, "step": 53825 }, { "epoch": 0.53, "grad_norm": 11.283665657043457, "learning_rate": 4.244431034482759e-06, "loss": 0.4036, "step": 53850 }, { "epoch": 0.53, "grad_norm": 10.235103607177734, "learning_rate": 4.244e-06, "loss": 0.2083, "step": 53875 }, { "epoch": 0.53, "grad_norm": 20.92281723022461, "learning_rate": 4.243568965517242e-06, "loss": 0.3915, "step": 53900 }, { "epoch": 0.53, "grad_norm": 7.110289096832275, "learning_rate": 4.243137931034483e-06, "loss": 0.2301, "step": 53925 }, { "epoch": 0.53, "grad_norm": 20.354114532470703, "learning_rate": 4.242706896551724e-06, "loss": 0.3712, "step": 53950 }, { "epoch": 0.53, "grad_norm": 9.460657119750977, "learning_rate": 4.242275862068966e-06, "loss": 0.2462, "step": 53975 }, { "epoch": 0.53, "grad_norm": 17.71807861328125, "learning_rate": 4.241844827586208e-06, "loss": 0.3357, "step": 54000 }, { "epoch": 0.53, "grad_norm": 6.1849775314331055, "learning_rate": 4.241413793103449e-06, "loss": 0.2536, "step": 54025 }, { "epoch": 0.53, "grad_norm": 16.319345474243164, "learning_rate": 4.24098275862069e-06, "loss": 0.4414, "step": 54050 }, { "epoch": 0.53, "grad_norm": 14.8930082321167, "learning_rate": 4.240551724137931e-06, "loss": 0.2141, "step": 54075 }, { "epoch": 0.53, "grad_norm": 17.475248336791992, "learning_rate": 4.240120689655173e-06, "loss": 0.4046, "step": 54100 }, { "epoch": 0.53, "grad_norm": 13.270785331726074, "learning_rate": 4.239689655172414e-06, "loss": 0.1871, "step": 54125 }, { "epoch": 0.53, "grad_norm": 18.092531204223633, "learning_rate": 4.2392586206896555e-06, "loss": 0.3951, "step": 54150 }, { "epoch": 0.53, "grad_norm": 5.132674217224121, "learning_rate": 4.238827586206897e-06, "loss": 0.2355, "step": 54175 }, { "epoch": 0.53, "grad_norm": 18.565967559814453, "learning_rate": 4.2383965517241384e-06, "loss": 0.3859, "step": 54200 }, { "epoch": 0.53, "grad_norm": 8.803686141967773, "learning_rate": 4.23796551724138e-06, "loss": 0.26, "step": 54225 }, { "epoch": 0.53, "grad_norm": 20.16525650024414, "learning_rate": 4.237534482758621e-06, "loss": 0.4098, "step": 54250 }, { "epoch": 0.53, "grad_norm": 5.771519184112549, "learning_rate": 4.237103448275862e-06, "loss": 0.2174, "step": 54275 }, { "epoch": 0.53, "grad_norm": 16.315271377563477, "learning_rate": 4.2366724137931034e-06, "loss": 0.4048, "step": 54300 }, { "epoch": 0.53, "grad_norm": 7.157895565032959, "learning_rate": 4.236241379310345e-06, "loss": 0.1967, "step": 54325 }, { "epoch": 0.53, "grad_norm": 18.368825912475586, "learning_rate": 4.235810344827586e-06, "loss": 0.463, "step": 54350 }, { "epoch": 0.53, "grad_norm": 9.02646255493164, "learning_rate": 4.235379310344828e-06, "loss": 0.232, "step": 54375 }, { "epoch": 0.54, "grad_norm": 20.61746597290039, "learning_rate": 4.234948275862069e-06, "loss": 0.4529, "step": 54400 }, { "epoch": 0.54, "grad_norm": 1.407729148864746, "learning_rate": 4.234517241379311e-06, "loss": 0.2705, "step": 54425 }, { "epoch": 0.54, "grad_norm": 20.754545211791992, "learning_rate": 4.234086206896552e-06, "loss": 0.4169, "step": 54450 }, { "epoch": 0.54, "grad_norm": 7.8885931968688965, "learning_rate": 4.233655172413794e-06, "loss": 0.2087, "step": 54475 }, { "epoch": 0.54, "grad_norm": 15.829153060913086, "learning_rate": 4.233224137931034e-06, "loss": 0.4471, "step": 54500 }, { "epoch": 0.54, "grad_norm": 13.023852348327637, "learning_rate": 4.232793103448276e-06, "loss": 0.2512, "step": 54525 }, { "epoch": 0.54, "grad_norm": 9.12546443939209, "learning_rate": 4.232362068965518e-06, "loss": 0.3404, "step": 54550 }, { "epoch": 0.54, "grad_norm": 12.07490062713623, "learning_rate": 4.2319310344827594e-06, "loss": 0.2533, "step": 54575 }, { "epoch": 0.54, "grad_norm": 22.420682907104492, "learning_rate": 4.231500000000001e-06, "loss": 0.3779, "step": 54600 }, { "epoch": 0.54, "grad_norm": 6.908457279205322, "learning_rate": 4.2310689655172415e-06, "loss": 0.1765, "step": 54625 }, { "epoch": 0.54, "grad_norm": 35.679420471191406, "learning_rate": 4.230637931034483e-06, "loss": 0.39, "step": 54650 }, { "epoch": 0.54, "grad_norm": 7.08853816986084, "learning_rate": 4.2302068965517244e-06, "loss": 0.2678, "step": 54675 }, { "epoch": 0.54, "grad_norm": 8.617565155029297, "learning_rate": 4.229775862068966e-06, "loss": 0.3265, "step": 54700 }, { "epoch": 0.54, "grad_norm": 10.52998161315918, "learning_rate": 4.229344827586207e-06, "loss": 0.2104, "step": 54725 }, { "epoch": 0.54, "grad_norm": 17.571020126342773, "learning_rate": 4.228913793103449e-06, "loss": 0.3607, "step": 54750 }, { "epoch": 0.54, "grad_norm": 13.236793518066406, "learning_rate": 4.22848275862069e-06, "loss": 0.2497, "step": 54775 }, { "epoch": 0.54, "grad_norm": 23.87506103515625, "learning_rate": 4.228051724137932e-06, "loss": 0.3586, "step": 54800 }, { "epoch": 0.54, "grad_norm": 10.187002182006836, "learning_rate": 4.227620689655173e-06, "loss": 0.1923, "step": 54825 }, { "epoch": 0.54, "grad_norm": 16.872020721435547, "learning_rate": 4.227189655172414e-06, "loss": 0.3615, "step": 54850 }, { "epoch": 0.54, "grad_norm": 5.938270092010498, "learning_rate": 4.226758620689655e-06, "loss": 0.2294, "step": 54875 }, { "epoch": 0.54, "grad_norm": 20.046602249145508, "learning_rate": 4.226327586206897e-06, "loss": 0.4328, "step": 54900 }, { "epoch": 0.54, "grad_norm": 8.902716636657715, "learning_rate": 4.225896551724138e-06, "loss": 0.2394, "step": 54925 }, { "epoch": 0.54, "grad_norm": 13.870621681213379, "learning_rate": 4.22548275862069e-06, "loss": 0.4718, "step": 54950 }, { "epoch": 0.54, "grad_norm": 6.211285591125488, "learning_rate": 4.225051724137931e-06, "loss": 0.2604, "step": 54975 }, { "epoch": 0.54, "grad_norm": 12.991025924682617, "learning_rate": 4.2246206896551725e-06, "loss": 0.3979, "step": 55000 }, { "epoch": 0.54, "grad_norm": 12.195232391357422, "learning_rate": 4.224189655172414e-06, "loss": 0.1935, "step": 55025 }, { "epoch": 0.54, "grad_norm": 13.252554893493652, "learning_rate": 4.2237586206896554e-06, "loss": 0.3654, "step": 55050 }, { "epoch": 0.54, "grad_norm": 9.64731502532959, "learning_rate": 4.223327586206897e-06, "loss": 0.2142, "step": 55075 }, { "epoch": 0.54, "grad_norm": 25.894458770751953, "learning_rate": 4.222896551724138e-06, "loss": 0.4018, "step": 55100 }, { "epoch": 0.54, "grad_norm": 8.503280639648438, "learning_rate": 4.22246551724138e-06, "loss": 0.262, "step": 55125 }, { "epoch": 0.54, "grad_norm": 20.089921951293945, "learning_rate": 4.222034482758621e-06, "loss": 0.3681, "step": 55150 }, { "epoch": 0.54, "grad_norm": 7.998415946960449, "learning_rate": 4.221603448275862e-06, "loss": 0.2136, "step": 55175 }, { "epoch": 0.54, "grad_norm": 19.97775650024414, "learning_rate": 4.221172413793103e-06, "loss": 0.4077, "step": 55200 }, { "epoch": 0.54, "grad_norm": 9.995193481445312, "learning_rate": 4.220741379310346e-06, "loss": 0.2039, "step": 55225 }, { "epoch": 0.54, "grad_norm": 13.987615585327148, "learning_rate": 4.220310344827587e-06, "loss": 0.3657, "step": 55250 }, { "epoch": 0.54, "grad_norm": 9.512188911437988, "learning_rate": 4.219879310344828e-06, "loss": 0.2473, "step": 55275 }, { "epoch": 0.54, "grad_norm": 23.889562606811523, "learning_rate": 4.219448275862069e-06, "loss": 0.4358, "step": 55300 }, { "epoch": 0.54, "grad_norm": 7.579962253570557, "learning_rate": 4.219017241379311e-06, "loss": 0.2377, "step": 55325 }, { "epoch": 0.54, "grad_norm": 19.492128372192383, "learning_rate": 4.218586206896552e-06, "loss": 0.3986, "step": 55350 }, { "epoch": 0.54, "grad_norm": 9.139385223388672, "learning_rate": 4.2181551724137935e-06, "loss": 0.1866, "step": 55375 }, { "epoch": 0.54, "grad_norm": 26.607385635375977, "learning_rate": 4.217724137931035e-06, "loss": 0.3771, "step": 55400 }, { "epoch": 0.55, "grad_norm": 13.814603805541992, "learning_rate": 4.2172931034482764e-06, "loss": 0.2847, "step": 55425 }, { "epoch": 0.55, "grad_norm": 20.5399169921875, "learning_rate": 4.216862068965518e-06, "loss": 0.4744, "step": 55450 }, { "epoch": 0.55, "grad_norm": 11.80937385559082, "learning_rate": 4.216431034482759e-06, "loss": 0.2283, "step": 55475 }, { "epoch": 0.55, "grad_norm": 15.348235130310059, "learning_rate": 4.216e-06, "loss": 0.3953, "step": 55500 }, { "epoch": 0.55, "grad_norm": 4.528231620788574, "learning_rate": 4.215568965517241e-06, "loss": 0.2255, "step": 55525 }, { "epoch": 0.55, "grad_norm": 5.945939540863037, "learning_rate": 4.215137931034483e-06, "loss": 0.3429, "step": 55550 }, { "epoch": 0.55, "grad_norm": 5.984127521514893, "learning_rate": 4.214706896551724e-06, "loss": 0.2283, "step": 55575 }, { "epoch": 0.55, "grad_norm": 18.7340030670166, "learning_rate": 4.214275862068966e-06, "loss": 0.391, "step": 55600 }, { "epoch": 0.55, "grad_norm": 22.708675384521484, "learning_rate": 4.213844827586207e-06, "loss": 0.2286, "step": 55625 }, { "epoch": 0.55, "grad_norm": 22.479825973510742, "learning_rate": 4.213413793103449e-06, "loss": 0.3805, "step": 55650 }, { "epoch": 0.55, "grad_norm": 6.12938928604126, "learning_rate": 4.21298275862069e-06, "loss": 0.258, "step": 55675 }, { "epoch": 0.55, "grad_norm": 19.53078842163086, "learning_rate": 4.212551724137932e-06, "loss": 0.4209, "step": 55700 }, { "epoch": 0.55, "grad_norm": 14.989806175231934, "learning_rate": 4.212120689655172e-06, "loss": 0.2362, "step": 55725 }, { "epoch": 0.55, "grad_norm": 17.672222137451172, "learning_rate": 4.211689655172414e-06, "loss": 0.38, "step": 55750 }, { "epoch": 0.55, "grad_norm": 10.574797630310059, "learning_rate": 4.211258620689655e-06, "loss": 0.214, "step": 55775 }, { "epoch": 0.55, "grad_norm": 23.55452537536621, "learning_rate": 4.2108275862068974e-06, "loss": 0.406, "step": 55800 }, { "epoch": 0.55, "grad_norm": 8.759215354919434, "learning_rate": 4.210396551724139e-06, "loss": 0.2159, "step": 55825 }, { "epoch": 0.55, "grad_norm": 13.040299415588379, "learning_rate": 4.2099655172413795e-06, "loss": 0.4167, "step": 55850 }, { "epoch": 0.55, "grad_norm": 9.842036247253418, "learning_rate": 4.209534482758621e-06, "loss": 0.2245, "step": 55875 }, { "epoch": 0.55, "grad_norm": 12.113204956054688, "learning_rate": 4.209103448275862e-06, "loss": 0.408, "step": 55900 }, { "epoch": 0.55, "grad_norm": 12.25839614868164, "learning_rate": 4.208672413793104e-06, "loss": 0.2354, "step": 55925 }, { "epoch": 0.55, "grad_norm": 17.540388107299805, "learning_rate": 4.208241379310345e-06, "loss": 0.3726, "step": 55950 }, { "epoch": 0.55, "grad_norm": 7.497552394866943, "learning_rate": 4.207810344827587e-06, "loss": 0.2445, "step": 55975 }, { "epoch": 0.55, "grad_norm": 17.90994644165039, "learning_rate": 4.207379310344828e-06, "loss": 0.4169, "step": 56000 }, { "epoch": 0.55, "grad_norm": 8.063149452209473, "learning_rate": 4.20694827586207e-06, "loss": 0.2033, "step": 56025 }, { "epoch": 0.55, "grad_norm": 20.358930587768555, "learning_rate": 4.206517241379311e-06, "loss": 0.4242, "step": 56050 }, { "epoch": 0.55, "grad_norm": 12.259398460388184, "learning_rate": 4.206086206896552e-06, "loss": 0.2346, "step": 56075 }, { "epoch": 0.55, "grad_norm": 18.762624740600586, "learning_rate": 4.205655172413793e-06, "loss": 0.36, "step": 56100 }, { "epoch": 0.55, "grad_norm": 7.800327301025391, "learning_rate": 4.205224137931035e-06, "loss": 0.2239, "step": 56125 }, { "epoch": 0.55, "grad_norm": 25.21324920654297, "learning_rate": 4.204793103448276e-06, "loss": 0.4235, "step": 56150 }, { "epoch": 0.55, "grad_norm": 6.12460470199585, "learning_rate": 4.204362068965518e-06, "loss": 0.1912, "step": 56175 }, { "epoch": 0.55, "grad_norm": 11.161556243896484, "learning_rate": 4.203931034482759e-06, "loss": 0.357, "step": 56200 }, { "epoch": 0.55, "grad_norm": 10.392638206481934, "learning_rate": 4.2035000000000005e-06, "loss": 0.2088, "step": 56225 }, { "epoch": 0.55, "grad_norm": 23.398195266723633, "learning_rate": 4.203068965517242e-06, "loss": 0.4172, "step": 56250 }, { "epoch": 0.55, "grad_norm": 11.793143272399902, "learning_rate": 4.202637931034483e-06, "loss": 0.2493, "step": 56275 }, { "epoch": 0.55, "grad_norm": 15.56647777557373, "learning_rate": 4.202206896551724e-06, "loss": 0.4385, "step": 56300 }, { "epoch": 0.55, "grad_norm": 8.76117992401123, "learning_rate": 4.2017758620689655e-06, "loss": 0.2559, "step": 56325 }, { "epoch": 0.55, "grad_norm": 20.92462730407715, "learning_rate": 4.201344827586207e-06, "loss": 0.4538, "step": 56350 }, { "epoch": 0.55, "grad_norm": 7.284182548522949, "learning_rate": 4.200913793103449e-06, "loss": 0.2651, "step": 56375 }, { "epoch": 0.55, "grad_norm": 12.896986961364746, "learning_rate": 4.20048275862069e-06, "loss": 0.4164, "step": 56400 }, { "epoch": 0.55, "grad_norm": 13.588196754455566, "learning_rate": 4.200051724137931e-06, "loss": 0.2431, "step": 56425 }, { "epoch": 0.56, "grad_norm": 14.633020401000977, "learning_rate": 4.199620689655173e-06, "loss": 0.4401, "step": 56450 }, { "epoch": 0.56, "grad_norm": 8.7359037399292, "learning_rate": 4.199189655172414e-06, "loss": 0.2402, "step": 56475 }, { "epoch": 0.56, "grad_norm": 19.299095153808594, "learning_rate": 4.198758620689656e-06, "loss": 0.4508, "step": 56500 }, { "epoch": 0.56, "grad_norm": 7.8663811683654785, "learning_rate": 4.198327586206897e-06, "loss": 0.2158, "step": 56525 }, { "epoch": 0.56, "grad_norm": 14.405794143676758, "learning_rate": 4.197896551724139e-06, "loss": 0.3774, "step": 56550 }, { "epoch": 0.56, "grad_norm": 9.860845565795898, "learning_rate": 4.19746551724138e-06, "loss": 0.2435, "step": 56575 }, { "epoch": 0.56, "grad_norm": 15.852493286132812, "learning_rate": 4.1970344827586215e-06, "loss": 0.3882, "step": 56600 }, { "epoch": 0.56, "grad_norm": 9.673674583435059, "learning_rate": 4.196603448275862e-06, "loss": 0.2326, "step": 56625 }, { "epoch": 0.56, "grad_norm": 21.207475662231445, "learning_rate": 4.1961724137931036e-06, "loss": 0.4565, "step": 56650 }, { "epoch": 0.56, "grad_norm": 10.709370613098145, "learning_rate": 4.195741379310345e-06, "loss": 0.2733, "step": 56675 }, { "epoch": 0.56, "grad_norm": 19.17057991027832, "learning_rate": 4.1953103448275865e-06, "loss": 0.415, "step": 56700 }, { "epoch": 0.56, "grad_norm": 8.13882827758789, "learning_rate": 4.194879310344828e-06, "loss": 0.2161, "step": 56725 }, { "epoch": 0.56, "grad_norm": 9.645058631896973, "learning_rate": 4.194448275862069e-06, "loss": 0.3735, "step": 56750 }, { "epoch": 0.56, "grad_norm": 13.3130521774292, "learning_rate": 4.194017241379311e-06, "loss": 0.2342, "step": 56775 }, { "epoch": 0.56, "grad_norm": 15.07535171508789, "learning_rate": 4.193586206896552e-06, "loss": 0.4612, "step": 56800 }, { "epoch": 0.56, "grad_norm": 10.872971534729004, "learning_rate": 4.193155172413794e-06, "loss": 0.2293, "step": 56825 }, { "epoch": 0.56, "grad_norm": 5.080417633056641, "learning_rate": 4.192724137931034e-06, "loss": 0.395, "step": 56850 }, { "epoch": 0.56, "grad_norm": 9.88762092590332, "learning_rate": 4.192293103448276e-06, "loss": 0.2177, "step": 56875 }, { "epoch": 0.56, "grad_norm": 22.63212013244629, "learning_rate": 4.191862068965517e-06, "loss": 0.3991, "step": 56900 }, { "epoch": 0.56, "grad_norm": 13.45644474029541, "learning_rate": 4.191431034482759e-06, "loss": 0.2055, "step": 56925 }, { "epoch": 0.56, "grad_norm": Infinity, "learning_rate": 4.19101724137931e-06, "loss": 0.4165, "step": 56950 }, { "epoch": 0.56, "grad_norm": 13.875142097473145, "learning_rate": 4.190586206896552e-06, "loss": 0.2196, "step": 56975 }, { "epoch": 0.56, "grad_norm": 32.49763488769531, "learning_rate": 4.190155172413793e-06, "loss": 0.4032, "step": 57000 }, { "epoch": 0.56, "grad_norm": 15.940385818481445, "learning_rate": 4.1897241379310346e-06, "loss": 0.2271, "step": 57025 }, { "epoch": 0.56, "grad_norm": 14.146900177001953, "learning_rate": 4.189293103448276e-06, "loss": 0.4719, "step": 57050 }, { "epoch": 0.56, "grad_norm": 6.232107162475586, "learning_rate": 4.1888620689655175e-06, "loss": 0.1478, "step": 57075 }, { "epoch": 0.56, "grad_norm": Infinity, "learning_rate": 4.188448275862069e-06, "loss": 0.4372, "step": 57100 }, { "epoch": 0.56, "grad_norm": 7.261572360992432, "learning_rate": 4.18801724137931e-06, "loss": 0.2081, "step": 57125 }, { "epoch": 0.56, "grad_norm": 20.834238052368164, "learning_rate": 4.187586206896552e-06, "loss": 0.3898, "step": 57150 }, { "epoch": 0.56, "grad_norm": 13.545828819274902, "learning_rate": 4.187155172413793e-06, "loss": 0.2433, "step": 57175 }, { "epoch": 0.56, "grad_norm": 17.672412872314453, "learning_rate": 4.186724137931035e-06, "loss": 0.4857, "step": 57200 }, { "epoch": 0.56, "grad_norm": 8.920010566711426, "learning_rate": 4.186293103448276e-06, "loss": 0.2159, "step": 57225 }, { "epoch": 0.56, "grad_norm": 16.06743621826172, "learning_rate": 4.185862068965518e-06, "loss": 0.4625, "step": 57250 }, { "epoch": 0.56, "grad_norm": 18.321414947509766, "learning_rate": 4.185431034482759e-06, "loss": 0.2441, "step": 57275 }, { "epoch": 0.56, "grad_norm": 16.33292007446289, "learning_rate": 4.185000000000001e-06, "loss": 0.3765, "step": 57300 }, { "epoch": 0.56, "grad_norm": 13.69056224822998, "learning_rate": 4.184568965517242e-06, "loss": 0.2287, "step": 57325 }, { "epoch": 0.56, "grad_norm": 21.588306427001953, "learning_rate": 4.1841379310344835e-06, "loss": 0.4041, "step": 57350 }, { "epoch": 0.56, "grad_norm": 8.774287223815918, "learning_rate": 4.183706896551725e-06, "loss": 0.228, "step": 57375 }, { "epoch": 0.56, "grad_norm": 12.679067611694336, "learning_rate": 4.1832758620689656e-06, "loss": 0.3047, "step": 57400 }, { "epoch": 0.56, "grad_norm": 14.87093734741211, "learning_rate": 4.182844827586207e-06, "loss": 0.2152, "step": 57425 }, { "epoch": 0.57, "grad_norm": 18.62353515625, "learning_rate": 4.1824137931034485e-06, "loss": 0.4696, "step": 57450 }, { "epoch": 0.57, "grad_norm": 10.278468132019043, "learning_rate": 4.18198275862069e-06, "loss": 0.235, "step": 57475 }, { "epoch": 0.57, "grad_norm": 12.933960914611816, "learning_rate": 4.181551724137931e-06, "loss": 0.4764, "step": 57500 }, { "epoch": 0.57, "grad_norm": 11.878569602966309, "learning_rate": 4.181120689655173e-06, "loss": 0.2038, "step": 57525 }, { "epoch": 0.57, "grad_norm": 14.09988021850586, "learning_rate": 4.180689655172414e-06, "loss": 0.4702, "step": 57550 }, { "epoch": 0.57, "grad_norm": 10.3616361618042, "learning_rate": 4.180258620689656e-06, "loss": 0.2164, "step": 57575 }, { "epoch": 0.57, "grad_norm": 21.496509552001953, "learning_rate": 4.179827586206897e-06, "loss": 0.4206, "step": 57600 }, { "epoch": 0.57, "grad_norm": 10.255602836608887, "learning_rate": 4.179396551724138e-06, "loss": 0.1851, "step": 57625 }, { "epoch": 0.57, "grad_norm": 17.813467025756836, "learning_rate": 4.178965517241379e-06, "loss": 0.3793, "step": 57650 }, { "epoch": 0.57, "grad_norm": 12.046916007995605, "learning_rate": 4.178534482758621e-06, "loss": 0.2683, "step": 57675 }, { "epoch": 0.57, "grad_norm": 23.750459671020508, "learning_rate": 4.178103448275862e-06, "loss": 0.3882, "step": 57700 }, { "epoch": 0.57, "grad_norm": 10.832592010498047, "learning_rate": 4.177672413793104e-06, "loss": 0.2298, "step": 57725 }, { "epoch": 0.57, "grad_norm": 19.959430694580078, "learning_rate": 4.177241379310345e-06, "loss": 0.3862, "step": 57750 }, { "epoch": 0.57, "grad_norm": 10.570462226867676, "learning_rate": 4.1768103448275866e-06, "loss": 0.2331, "step": 57775 }, { "epoch": 0.57, "grad_norm": 20.319971084594727, "learning_rate": 4.176379310344828e-06, "loss": 0.4066, "step": 57800 }, { "epoch": 0.57, "grad_norm": 5.626487731933594, "learning_rate": 4.1759482758620695e-06, "loss": 0.2168, "step": 57825 }, { "epoch": 0.57, "grad_norm": 21.60234260559082, "learning_rate": 4.17551724137931e-06, "loss": 0.3886, "step": 57850 }, { "epoch": 0.57, "grad_norm": 4.878609657287598, "learning_rate": 4.175086206896552e-06, "loss": 0.2056, "step": 57875 }, { "epoch": 0.57, "grad_norm": 14.639880180358887, "learning_rate": 4.174655172413794e-06, "loss": 0.3695, "step": 57900 }, { "epoch": 0.57, "grad_norm": 6.4188551902771, "learning_rate": 4.174224137931035e-06, "loss": 0.2245, "step": 57925 }, { "epoch": 0.57, "grad_norm": 23.08250617980957, "learning_rate": 4.173793103448276e-06, "loss": 0.3819, "step": 57950 }, { "epoch": 0.57, "grad_norm": 7.589394569396973, "learning_rate": 4.173362068965517e-06, "loss": 0.2317, "step": 57975 }, { "epoch": 0.57, "grad_norm": 5.46632719039917, "learning_rate": 4.172931034482759e-06, "loss": 0.325, "step": 58000 }, { "epoch": 0.57, "grad_norm": 10.518787384033203, "learning_rate": 4.1725e-06, "loss": 0.2216, "step": 58025 }, { "epoch": 0.57, "grad_norm": 18.550369262695312, "learning_rate": 4.172068965517242e-06, "loss": 0.4256, "step": 58050 }, { "epoch": 0.57, "grad_norm": 12.175277709960938, "learning_rate": 4.171637931034483e-06, "loss": 0.2056, "step": 58075 }, { "epoch": 0.57, "grad_norm": 19.973388671875, "learning_rate": 4.171206896551725e-06, "loss": 0.4862, "step": 58100 }, { "epoch": 0.57, "grad_norm": 8.652534484863281, "learning_rate": 4.170775862068966e-06, "loss": 0.1998, "step": 58125 }, { "epoch": 0.57, "grad_norm": 6.465892791748047, "learning_rate": 4.1703448275862076e-06, "loss": 0.392, "step": 58150 }, { "epoch": 0.57, "grad_norm": 7.930947303771973, "learning_rate": 4.169913793103448e-06, "loss": 0.1594, "step": 58175 }, { "epoch": 0.57, "grad_norm": 18.75818634033203, "learning_rate": 4.16948275862069e-06, "loss": 0.4075, "step": 58200 }, { "epoch": 0.57, "grad_norm": 13.46275806427002, "learning_rate": 4.169051724137931e-06, "loss": 0.2059, "step": 58225 }, { "epoch": 0.57, "grad_norm": 12.463970184326172, "learning_rate": 4.1686206896551725e-06, "loss": 0.406, "step": 58250 }, { "epoch": 0.57, "grad_norm": 4.58974552154541, "learning_rate": 4.168189655172414e-06, "loss": 0.1764, "step": 58275 }, { "epoch": 0.57, "grad_norm": 12.944082260131836, "learning_rate": 4.1677586206896555e-06, "loss": 0.3709, "step": 58300 }, { "epoch": 0.57, "grad_norm": 15.340627670288086, "learning_rate": 4.167327586206897e-06, "loss": 0.2369, "step": 58325 }, { "epoch": 0.57, "grad_norm": 17.78238868713379, "learning_rate": 4.166896551724138e-06, "loss": 0.4801, "step": 58350 }, { "epoch": 0.57, "grad_norm": 12.104658126831055, "learning_rate": 4.16646551724138e-06, "loss": 0.2572, "step": 58375 }, { "epoch": 0.57, "grad_norm": 19.74496841430664, "learning_rate": 4.1660344827586204e-06, "loss": 0.4786, "step": 58400 }, { "epoch": 0.57, "grad_norm": 15.355125427246094, "learning_rate": 4.165603448275862e-06, "loss": 0.2324, "step": 58425 }, { "epoch": 0.57, "grad_norm": 21.466110229492188, "learning_rate": 4.165172413793104e-06, "loss": 0.3791, "step": 58450 }, { "epoch": 0.58, "grad_norm": 9.343445777893066, "learning_rate": 4.164741379310346e-06, "loss": 0.1967, "step": 58475 }, { "epoch": 0.58, "grad_norm": 18.944927215576172, "learning_rate": 4.164310344827587e-06, "loss": 0.4097, "step": 58500 }, { "epoch": 0.58, "grad_norm": 17.671735763549805, "learning_rate": 4.163879310344828e-06, "loss": 0.2438, "step": 58525 }, { "epoch": 0.58, "grad_norm": 16.15230369567871, "learning_rate": 4.163448275862069e-06, "loss": 0.4583, "step": 58550 }, { "epoch": 0.58, "grad_norm": 7.40202522277832, "learning_rate": 4.163017241379311e-06, "loss": 0.245, "step": 58575 }, { "epoch": 0.58, "grad_norm": 3.8617756366729736, "learning_rate": 4.162586206896552e-06, "loss": 0.3756, "step": 58600 }, { "epoch": 0.58, "grad_norm": 8.321680068969727, "learning_rate": 4.1621551724137935e-06, "loss": 0.2553, "step": 58625 }, { "epoch": 0.58, "grad_norm": 25.635475158691406, "learning_rate": 4.161724137931035e-06, "loss": 0.4015, "step": 58650 }, { "epoch": 0.58, "grad_norm": 6.644644737243652, "learning_rate": 4.1612931034482765e-06, "loss": 0.2601, "step": 58675 }, { "epoch": 0.58, "grad_norm": 14.601767539978027, "learning_rate": 4.160862068965518e-06, "loss": 0.4155, "step": 58700 }, { "epoch": 0.58, "grad_norm": 13.136856079101562, "learning_rate": 4.160431034482759e-06, "loss": 0.199, "step": 58725 }, { "epoch": 0.58, "grad_norm": 16.585418701171875, "learning_rate": 4.16e-06, "loss": 0.3809, "step": 58750 }, { "epoch": 0.58, "grad_norm": 8.344559669494629, "learning_rate": 4.1595689655172414e-06, "loss": 0.2712, "step": 58775 }, { "epoch": 0.58, "grad_norm": 16.45542335510254, "learning_rate": 4.159137931034483e-06, "loss": 0.4604, "step": 58800 }, { "epoch": 0.58, "grad_norm": 9.512733459472656, "learning_rate": 4.158706896551724e-06, "loss": 0.1983, "step": 58825 }, { "epoch": 0.58, "grad_norm": 12.896279335021973, "learning_rate": 4.158275862068966e-06, "loss": 0.3797, "step": 58850 }, { "epoch": 0.58, "grad_norm": 13.027125358581543, "learning_rate": 4.157844827586207e-06, "loss": 0.2303, "step": 58875 }, { "epoch": 0.58, "grad_norm": 17.98148536682129, "learning_rate": 4.157413793103449e-06, "loss": 0.4191, "step": 58900 }, { "epoch": 0.58, "grad_norm": 16.14866065979004, "learning_rate": 4.15698275862069e-06, "loss": 0.2417, "step": 58925 }, { "epoch": 0.58, "grad_norm": 16.49949073791504, "learning_rate": 4.156551724137932e-06, "loss": 0.4092, "step": 58950 }, { "epoch": 0.58, "grad_norm": 2.6713507175445557, "learning_rate": 4.156120689655172e-06, "loss": 0.2063, "step": 58975 }, { "epoch": 0.58, "grad_norm": 15.551740646362305, "learning_rate": 4.155689655172414e-06, "loss": 0.3945, "step": 59000 }, { "epoch": 0.58, "grad_norm": 8.20316219329834, "learning_rate": 4.155258620689655e-06, "loss": 0.2131, "step": 59025 }, { "epoch": 0.58, "grad_norm": 17.079029083251953, "learning_rate": 4.1548275862068975e-06, "loss": 0.4278, "step": 59050 }, { "epoch": 0.58, "grad_norm": 8.406177520751953, "learning_rate": 4.154396551724138e-06, "loss": 0.2207, "step": 59075 }, { "epoch": 0.58, "grad_norm": 13.466768264770508, "learning_rate": 4.1539655172413795e-06, "loss": 0.413, "step": 59100 }, { "epoch": 0.58, "grad_norm": 11.053849220275879, "learning_rate": 4.153534482758621e-06, "loss": 0.2327, "step": 59125 }, { "epoch": 0.58, "grad_norm": 10.998483657836914, "learning_rate": 4.1531034482758624e-06, "loss": 0.3607, "step": 59150 }, { "epoch": 0.58, "grad_norm": 5.945724964141846, "learning_rate": 4.152672413793104e-06, "loss": 0.2272, "step": 59175 }, { "epoch": 0.58, "grad_norm": 8.840084075927734, "learning_rate": 4.152241379310345e-06, "loss": 0.3966, "step": 59200 }, { "epoch": 0.58, "grad_norm": 12.020674705505371, "learning_rate": 4.151810344827587e-06, "loss": 0.2526, "step": 59225 }, { "epoch": 0.58, "grad_norm": 18.76689910888672, "learning_rate": 4.151379310344828e-06, "loss": 0.4197, "step": 59250 }, { "epoch": 0.58, "grad_norm": 6.510401248931885, "learning_rate": 4.15094827586207e-06, "loss": 0.2391, "step": 59275 }, { "epoch": 0.58, "grad_norm": 12.397891998291016, "learning_rate": 4.15051724137931e-06, "loss": 0.3441, "step": 59300 }, { "epoch": 0.58, "grad_norm": 11.664587020874023, "learning_rate": 4.150086206896552e-06, "loss": 0.1954, "step": 59325 }, { "epoch": 0.58, "grad_norm": 16.51839828491211, "learning_rate": 4.149655172413793e-06, "loss": 0.3447, "step": 59350 }, { "epoch": 0.58, "grad_norm": 10.628759384155273, "learning_rate": 4.149224137931035e-06, "loss": 0.2142, "step": 59375 }, { "epoch": 0.58, "grad_norm": 12.049683570861816, "learning_rate": 4.148793103448276e-06, "loss": 0.3918, "step": 59400 }, { "epoch": 0.58, "grad_norm": 12.281694412231445, "learning_rate": 4.148362068965518e-06, "loss": 0.2246, "step": 59425 }, { "epoch": 0.58, "grad_norm": 24.60432243347168, "learning_rate": 4.147931034482759e-06, "loss": 0.3818, "step": 59450 }, { "epoch": 0.58, "grad_norm": 15.324037551879883, "learning_rate": 4.1475000000000005e-06, "loss": 0.198, "step": 59475 }, { "epoch": 0.59, "grad_norm": 35.296836853027344, "learning_rate": 4.147068965517242e-06, "loss": 0.3909, "step": 59500 }, { "epoch": 0.59, "grad_norm": 15.01140308380127, "learning_rate": 4.146637931034483e-06, "loss": 0.2184, "step": 59525 }, { "epoch": 0.59, "grad_norm": 13.294367790222168, "learning_rate": 4.146206896551724e-06, "loss": 0.3642, "step": 59550 }, { "epoch": 0.59, "grad_norm": 15.436156272888184, "learning_rate": 4.1457758620689655e-06, "loss": 0.2022, "step": 59575 }, { "epoch": 0.59, "grad_norm": 22.74988555908203, "learning_rate": 4.145344827586207e-06, "loss": 0.4192, "step": 59600 }, { "epoch": 0.59, "grad_norm": 6.124560356140137, "learning_rate": 4.144913793103449e-06, "loss": 0.2157, "step": 59625 }, { "epoch": 0.59, "grad_norm": 14.38127326965332, "learning_rate": 4.14448275862069e-06, "loss": 0.3815, "step": 59650 }, { "epoch": 0.59, "grad_norm": 7.482415199279785, "learning_rate": 4.144051724137931e-06, "loss": 0.2776, "step": 59675 }, { "epoch": 0.59, "grad_norm": 28.0313663482666, "learning_rate": 4.143620689655173e-06, "loss": 0.4642, "step": 59700 }, { "epoch": 0.59, "grad_norm": 15.228941917419434, "learning_rate": 4.143189655172414e-06, "loss": 0.2186, "step": 59725 }, { "epoch": 0.59, "grad_norm": 17.44774627685547, "learning_rate": 4.142758620689656e-06, "loss": 0.3942, "step": 59750 }, { "epoch": 0.59, "grad_norm": 14.631536483764648, "learning_rate": 4.142327586206897e-06, "loss": 0.2264, "step": 59775 }, { "epoch": 0.59, "grad_norm": 12.44413948059082, "learning_rate": 4.141896551724139e-06, "loss": 0.3604, "step": 59800 }, { "epoch": 0.59, "grad_norm": 13.751614570617676, "learning_rate": 4.14146551724138e-06, "loss": 0.2221, "step": 59825 }, { "epoch": 0.59, "grad_norm": 12.326183319091797, "learning_rate": 4.1410344827586215e-06, "loss": 0.3585, "step": 59850 }, { "epoch": 0.59, "grad_norm": 9.938019752502441, "learning_rate": 4.140603448275862e-06, "loss": 0.202, "step": 59875 }, { "epoch": 0.59, "grad_norm": 13.772727012634277, "learning_rate": 4.140172413793104e-06, "loss": 0.3987, "step": 59900 }, { "epoch": 0.59, "grad_norm": 9.487245559692383, "learning_rate": 4.139741379310345e-06, "loss": 0.216, "step": 59925 }, { "epoch": 0.59, "grad_norm": 13.357712745666504, "learning_rate": 4.1393103448275865e-06, "loss": 0.3559, "step": 59950 }, { "epoch": 0.59, "grad_norm": 6.3772807121276855, "learning_rate": 4.138879310344828e-06, "loss": 0.2585, "step": 59975 }, { "epoch": 0.59, "grad_norm": 16.839859008789062, "learning_rate": 4.138448275862069e-06, "loss": 0.481, "step": 60000 }, { "epoch": 0.59, "eval_loss": 0.46282958984375, "eval_runtime": 5865.5608, "eval_samples_per_second": 1.614, "eval_steps_per_second": 0.202, "eval_wer": 0.14659112856499065, "step": 60000 }, { "epoch": 0.59, "grad_norm": 8.525212287902832, "learning_rate": 4.138017241379311e-06, "loss": 0.1775, "step": 60025 }, { "epoch": 0.59, "grad_norm": 15.016844749450684, "learning_rate": 4.137586206896552e-06, "loss": 0.4105, "step": 60050 }, { "epoch": 0.59, "grad_norm": 9.794632911682129, "learning_rate": 4.137155172413794e-06, "loss": 0.2381, "step": 60075 }, { "epoch": 0.59, "grad_norm": 15.76549243927002, "learning_rate": 4.136724137931034e-06, "loss": 0.3559, "step": 60100 }, { "epoch": 0.59, "grad_norm": 7.246084690093994, "learning_rate": 4.136293103448276e-06, "loss": 0.2725, "step": 60125 }, { "epoch": 0.59, "grad_norm": 16.645797729492188, "learning_rate": 4.135862068965517e-06, "loss": 0.3949, "step": 60150 }, { "epoch": 0.59, "grad_norm": 18.34212875366211, "learning_rate": 4.135431034482759e-06, "loss": 0.2731, "step": 60175 }, { "epoch": 0.59, "grad_norm": 24.963623046875, "learning_rate": 4.135e-06, "loss": 0.3531, "step": 60200 }, { "epoch": 0.59, "grad_norm": 8.208566665649414, "learning_rate": 4.134568965517242e-06, "loss": 0.2536, "step": 60225 }, { "epoch": 0.59, "grad_norm": 18.401412963867188, "learning_rate": 4.134137931034483e-06, "loss": 0.4483, "step": 60250 }, { "epoch": 0.59, "grad_norm": 8.731658935546875, "learning_rate": 4.133706896551725e-06, "loss": 0.2083, "step": 60275 }, { "epoch": 0.59, "grad_norm": 19.85618019104004, "learning_rate": 4.133275862068965e-06, "loss": 0.3515, "step": 60300 }, { "epoch": 0.59, "grad_norm": 10.240216255187988, "learning_rate": 4.1328448275862075e-06, "loss": 0.2197, "step": 60325 }, { "epoch": 0.59, "grad_norm": 14.061461448669434, "learning_rate": 4.132413793103449e-06, "loss": 0.3604, "step": 60350 }, { "epoch": 0.59, "grad_norm": 7.999539375305176, "learning_rate": 4.13198275862069e-06, "loss": 0.2466, "step": 60375 }, { "epoch": 0.59, "grad_norm": 21.44745445251465, "learning_rate": 4.131551724137932e-06, "loss": 0.4453, "step": 60400 }, { "epoch": 0.59, "grad_norm": 11.073153495788574, "learning_rate": 4.1311206896551725e-06, "loss": 0.2025, "step": 60425 }, { "epoch": 0.59, "grad_norm": 25.46859359741211, "learning_rate": 4.130689655172414e-06, "loss": 0.4618, "step": 60450 }, { "epoch": 0.59, "grad_norm": 6.7349534034729, "learning_rate": 4.130258620689655e-06, "loss": 0.1973, "step": 60475 }, { "epoch": 0.6, "grad_norm": 11.762961387634277, "learning_rate": 4.129827586206897e-06, "loss": 0.4176, "step": 60500 }, { "epoch": 0.6, "grad_norm": 11.035311698913574, "learning_rate": 4.129396551724138e-06, "loss": 0.253, "step": 60525 }, { "epoch": 0.6, "grad_norm": 15.330416679382324, "learning_rate": 4.12896551724138e-06, "loss": 0.4112, "step": 60550 }, { "epoch": 0.6, "grad_norm": 8.847260475158691, "learning_rate": 4.128534482758621e-06, "loss": 0.2022, "step": 60575 }, { "epoch": 0.6, "grad_norm": 13.402334213256836, "learning_rate": 4.128103448275863e-06, "loss": 0.4291, "step": 60600 }, { "epoch": 0.6, "grad_norm": 3.4930641651153564, "learning_rate": 4.127672413793104e-06, "loss": 0.2118, "step": 60625 }, { "epoch": 0.6, "grad_norm": 17.089324951171875, "learning_rate": 4.127241379310345e-06, "loss": 0.4571, "step": 60650 }, { "epoch": 0.6, "grad_norm": 9.832658767700195, "learning_rate": 4.126810344827586e-06, "loss": 0.2407, "step": 60675 }, { "epoch": 0.6, "grad_norm": 16.770660400390625, "learning_rate": 4.126379310344828e-06, "loss": 0.3731, "step": 60700 }, { "epoch": 0.6, "grad_norm": 7.604823589324951, "learning_rate": 4.125948275862069e-06, "loss": 0.2255, "step": 60725 }, { "epoch": 0.6, "grad_norm": 7.309975624084473, "learning_rate": 4.1255172413793106e-06, "loss": 0.4409, "step": 60750 }, { "epoch": 0.6, "grad_norm": 8.137125015258789, "learning_rate": 4.125086206896552e-06, "loss": 0.2102, "step": 60775 }, { "epoch": 0.6, "grad_norm": 13.206509590148926, "learning_rate": 4.1246551724137935e-06, "loss": 0.4109, "step": 60800 }, { "epoch": 0.6, "grad_norm": 5.361893653869629, "learning_rate": 4.124224137931035e-06, "loss": 0.2033, "step": 60825 }, { "epoch": 0.6, "grad_norm": 15.875036239624023, "learning_rate": 4.123793103448276e-06, "loss": 0.3656, "step": 60850 }, { "epoch": 0.6, "grad_norm": 6.723869800567627, "learning_rate": 4.123362068965517e-06, "loss": 0.2178, "step": 60875 }, { "epoch": 0.6, "grad_norm": 16.01669692993164, "learning_rate": 4.122931034482759e-06, "loss": 0.4361, "step": 60900 }, { "epoch": 0.6, "grad_norm": 11.361605644226074, "learning_rate": 4.122500000000001e-06, "loss": 0.2235, "step": 60925 }, { "epoch": 0.6, "grad_norm": 25.310752868652344, "learning_rate": 4.122068965517242e-06, "loss": 0.4148, "step": 60950 }, { "epoch": 0.6, "grad_norm": 12.489757537841797, "learning_rate": 4.121637931034483e-06, "loss": 0.2444, "step": 60975 }, { "epoch": 0.6, "grad_norm": 15.210195541381836, "learning_rate": 4.121206896551724e-06, "loss": 0.4048, "step": 61000 }, { "epoch": 0.6, "grad_norm": 7.918449401855469, "learning_rate": 4.120775862068966e-06, "loss": 0.2088, "step": 61025 }, { "epoch": 0.6, "grad_norm": 14.162789344787598, "learning_rate": 4.120344827586207e-06, "loss": 0.3592, "step": 61050 }, { "epoch": 0.6, "grad_norm": 5.31602668762207, "learning_rate": 4.119913793103449e-06, "loss": 0.2272, "step": 61075 }, { "epoch": 0.6, "grad_norm": 19.139284133911133, "learning_rate": 4.11948275862069e-06, "loss": 0.3623, "step": 61100 }, { "epoch": 0.6, "grad_norm": 7.707322120666504, "learning_rate": 4.1190517241379316e-06, "loss": 0.2338, "step": 61125 }, { "epoch": 0.6, "grad_norm": 24.61988067626953, "learning_rate": 4.118620689655173e-06, "loss": 0.4454, "step": 61150 }, { "epoch": 0.6, "grad_norm": 8.189289093017578, "learning_rate": 4.1181896551724145e-06, "loss": 0.2657, "step": 61175 }, { "epoch": 0.6, "grad_norm": 20.920473098754883, "learning_rate": 4.117775862068966e-06, "loss": 0.3847, "step": 61200 }, { "epoch": 0.6, "grad_norm": 14.908019065856934, "learning_rate": 4.117344827586207e-06, "loss": 0.2904, "step": 61225 }, { "epoch": 0.6, "grad_norm": 21.480209350585938, "learning_rate": 4.116913793103449e-06, "loss": 0.3867, "step": 61250 }, { "epoch": 0.6, "grad_norm": 11.070099830627441, "learning_rate": 4.11648275862069e-06, "loss": 0.2362, "step": 61275 }, { "epoch": 0.6, "grad_norm": 20.3751220703125, "learning_rate": 4.116051724137932e-06, "loss": 0.4154, "step": 61300 }, { "epoch": 0.6, "grad_norm": 9.781108856201172, "learning_rate": 4.115620689655172e-06, "loss": 0.2174, "step": 61325 }, { "epoch": 0.6, "grad_norm": 14.464882850646973, "learning_rate": 4.115189655172414e-06, "loss": 0.4224, "step": 61350 }, { "epoch": 0.6, "grad_norm": 9.168455123901367, "learning_rate": 4.114758620689655e-06, "loss": 0.2156, "step": 61375 }, { "epoch": 0.6, "grad_norm": 25.47555923461914, "learning_rate": 4.114327586206897e-06, "loss": 0.4239, "step": 61400 }, { "epoch": 0.6, "grad_norm": 7.135481834411621, "learning_rate": 4.113896551724138e-06, "loss": 0.2223, "step": 61425 }, { "epoch": 0.6, "grad_norm": 13.54316520690918, "learning_rate": 4.11346551724138e-06, "loss": 0.4264, "step": 61450 }, { "epoch": 0.6, "grad_norm": 2.643087863922119, "learning_rate": 4.113034482758621e-06, "loss": 0.2031, "step": 61475 }, { "epoch": 0.6, "grad_norm": 16.825416564941406, "learning_rate": 4.1126034482758626e-06, "loss": 0.4226, "step": 61500 }, { "epoch": 0.61, "grad_norm": 7.186704635620117, "learning_rate": 4.112172413793104e-06, "loss": 0.2677, "step": 61525 }, { "epoch": 0.61, "grad_norm": 27.31147003173828, "learning_rate": 4.111741379310345e-06, "loss": 0.424, "step": 61550 }, { "epoch": 0.61, "grad_norm": 9.774738311767578, "learning_rate": 4.111310344827586e-06, "loss": 0.1916, "step": 61575 }, { "epoch": 0.61, "grad_norm": 29.64301109313965, "learning_rate": 4.110879310344828e-06, "loss": 0.4237, "step": 61600 }, { "epoch": 0.61, "grad_norm": 11.56531810760498, "learning_rate": 4.11044827586207e-06, "loss": 0.2219, "step": 61625 }, { "epoch": 0.61, "grad_norm": 15.693480491638184, "learning_rate": 4.1100172413793105e-06, "loss": 0.4175, "step": 61650 }, { "epoch": 0.61, "grad_norm": 7.599756240844727, "learning_rate": 4.109586206896552e-06, "loss": 0.2013, "step": 61675 }, { "epoch": 0.61, "grad_norm": 17.011396408081055, "learning_rate": 4.109155172413793e-06, "loss": 0.3354, "step": 61700 }, { "epoch": 0.61, "grad_norm": 9.501593589782715, "learning_rate": 4.108724137931035e-06, "loss": 0.2111, "step": 61725 }, { "epoch": 0.61, "grad_norm": 20.64093589782715, "learning_rate": 4.108293103448276e-06, "loss": 0.3849, "step": 61750 }, { "epoch": 0.61, "grad_norm": 12.422394752502441, "learning_rate": 4.107862068965518e-06, "loss": 0.2856, "step": 61775 }, { "epoch": 0.61, "grad_norm": 17.291894912719727, "learning_rate": 4.107431034482759e-06, "loss": 0.3954, "step": 61800 }, { "epoch": 0.61, "grad_norm": 6.953711032867432, "learning_rate": 4.107000000000001e-06, "loss": 0.2178, "step": 61825 }, { "epoch": 0.61, "grad_norm": 22.88322639465332, "learning_rate": 4.106568965517242e-06, "loss": 0.4331, "step": 61850 }, { "epoch": 0.61, "grad_norm": 4.406961917877197, "learning_rate": 4.106137931034483e-06, "loss": 0.1919, "step": 61875 }, { "epoch": 0.61, "grad_norm": 19.698822021484375, "learning_rate": 4.105706896551724e-06, "loss": 0.4336, "step": 61900 }, { "epoch": 0.61, "grad_norm": 14.851612091064453, "learning_rate": 4.105275862068966e-06, "loss": 0.21, "step": 61925 }, { "epoch": 0.61, "grad_norm": 20.76719093322754, "learning_rate": 4.104844827586207e-06, "loss": 0.4167, "step": 61950 }, { "epoch": 0.61, "grad_norm": 12.735614776611328, "learning_rate": 4.1044137931034485e-06, "loss": 0.1887, "step": 61975 }, { "epoch": 0.61, "grad_norm": 22.009262084960938, "learning_rate": 4.10398275862069e-06, "loss": 0.4861, "step": 62000 }, { "epoch": 0.61, "grad_norm": 10.16843032836914, "learning_rate": 4.1035517241379315e-06, "loss": 0.2225, "step": 62025 }, { "epoch": 0.61, "grad_norm": 16.845970153808594, "learning_rate": 4.103120689655173e-06, "loss": 0.4297, "step": 62050 }, { "epoch": 0.61, "grad_norm": 8.269804000854492, "learning_rate": 4.102689655172414e-06, "loss": 0.1836, "step": 62075 }, { "epoch": 0.61, "grad_norm": 24.009828567504883, "learning_rate": 4.102258620689655e-06, "loss": 0.4503, "step": 62100 }, { "epoch": 0.61, "grad_norm": 9.886873245239258, "learning_rate": 4.1018275862068964e-06, "loss": 0.2102, "step": 62125 }, { "epoch": 0.61, "grad_norm": 16.32060432434082, "learning_rate": 4.101396551724138e-06, "loss": 0.3513, "step": 62150 }, { "epoch": 0.61, "grad_norm": 4.905256748199463, "learning_rate": 4.10096551724138e-06, "loss": 0.2326, "step": 62175 }, { "epoch": 0.61, "grad_norm": 11.80701732635498, "learning_rate": 4.100534482758621e-06, "loss": 0.3473, "step": 62200 }, { "epoch": 0.61, "grad_norm": 5.958581924438477, "learning_rate": 4.100103448275862e-06, "loss": 0.2025, "step": 62225 }, { "epoch": 0.61, "grad_norm": 15.846867561340332, "learning_rate": 4.099672413793104e-06, "loss": 0.3862, "step": 62250 }, { "epoch": 0.61, "grad_norm": 4.281677722930908, "learning_rate": 4.099241379310345e-06, "loss": 0.2206, "step": 62275 }, { "epoch": 0.61, "grad_norm": 10.633455276489258, "learning_rate": 4.098810344827587e-06, "loss": 0.3578, "step": 62300 }, { "epoch": 0.61, "grad_norm": 14.203116416931152, "learning_rate": 4.098379310344828e-06, "loss": 0.227, "step": 62325 }, { "epoch": 0.61, "grad_norm": 15.6585054397583, "learning_rate": 4.0979482758620695e-06, "loss": 0.3472, "step": 62350 }, { "epoch": 0.61, "grad_norm": 6.420027732849121, "learning_rate": 4.097517241379311e-06, "loss": 0.2296, "step": 62375 }, { "epoch": 0.61, "grad_norm": 19.13623809814453, "learning_rate": 4.0970862068965525e-06, "loss": 0.3362, "step": 62400 }, { "epoch": 0.61, "grad_norm": 12.908347129821777, "learning_rate": 4.096655172413793e-06, "loss": 0.1653, "step": 62425 }, { "epoch": 0.61, "grad_norm": 24.067766189575195, "learning_rate": 4.0962241379310345e-06, "loss": 0.3503, "step": 62450 }, { "epoch": 0.61, "grad_norm": 8.465949058532715, "learning_rate": 4.095793103448276e-06, "loss": 0.2326, "step": 62475 }, { "epoch": 0.61, "grad_norm": 58.10072326660156, "learning_rate": 4.0953620689655174e-06, "loss": 0.4085, "step": 62500 }, { "epoch": 0.61, "grad_norm": 9.587092399597168, "learning_rate": 4.094931034482759e-06, "loss": 0.2065, "step": 62525 }, { "epoch": 0.62, "grad_norm": 21.389684677124023, "learning_rate": 4.0945e-06, "loss": 0.3545, "step": 62550 }, { "epoch": 0.62, "grad_norm": 15.035667419433594, "learning_rate": 4.094068965517242e-06, "loss": 0.2162, "step": 62575 }, { "epoch": 0.62, "grad_norm": 13.388468742370605, "learning_rate": 4.093637931034483e-06, "loss": 0.4031, "step": 62600 }, { "epoch": 0.62, "grad_norm": 12.617664337158203, "learning_rate": 4.093206896551725e-06, "loss": 0.2714, "step": 62625 }, { "epoch": 0.62, "grad_norm": 18.865556716918945, "learning_rate": 4.092775862068965e-06, "loss": 0.4129, "step": 62650 }, { "epoch": 0.62, "grad_norm": 10.296509742736816, "learning_rate": 4.092344827586207e-06, "loss": 0.2457, "step": 62675 }, { "epoch": 0.62, "grad_norm": 13.7154541015625, "learning_rate": 4.091913793103448e-06, "loss": 0.372, "step": 62700 }, { "epoch": 0.62, "grad_norm": 5.608311176300049, "learning_rate": 4.09148275862069e-06, "loss": 0.2664, "step": 62725 }, { "epoch": 0.62, "grad_norm": 17.84773826599121, "learning_rate": 4.091051724137932e-06, "loss": 0.4369, "step": 62750 }, { "epoch": 0.62, "grad_norm": 10.031953811645508, "learning_rate": 4.090620689655173e-06, "loss": 0.2028, "step": 62775 }, { "epoch": 0.62, "grad_norm": 20.498647689819336, "learning_rate": 4.090189655172414e-06, "loss": 0.4026, "step": 62800 }, { "epoch": 0.62, "grad_norm": 6.603042125701904, "learning_rate": 4.0897586206896555e-06, "loss": 0.1939, "step": 62825 }, { "epoch": 0.62, "grad_norm": 17.88648223876953, "learning_rate": 4.089327586206897e-06, "loss": 0.3795, "step": 62850 }, { "epoch": 0.62, "grad_norm": 6.785904407501221, "learning_rate": 4.0888965517241384e-06, "loss": 0.2461, "step": 62875 }, { "epoch": 0.62, "grad_norm": 18.551389694213867, "learning_rate": 4.08846551724138e-06, "loss": 0.3671, "step": 62900 }, { "epoch": 0.62, "grad_norm": 10.444602966308594, "learning_rate": 4.088034482758621e-06, "loss": 0.2459, "step": 62925 }, { "epoch": 0.62, "grad_norm": 22.094743728637695, "learning_rate": 4.087603448275863e-06, "loss": 0.3982, "step": 62950 }, { "epoch": 0.62, "grad_norm": 10.272940635681152, "learning_rate": 4.087172413793104e-06, "loss": 0.1922, "step": 62975 }, { "epoch": 0.62, "grad_norm": 14.980732917785645, "learning_rate": 4.086741379310345e-06, "loss": 0.3796, "step": 63000 }, { "epoch": 0.62, "grad_norm": 7.7369208335876465, "learning_rate": 4.086310344827586e-06, "loss": 0.2393, "step": 63025 }, { "epoch": 0.62, "grad_norm": 18.029460906982422, "learning_rate": 4.085879310344828e-06, "loss": 0.375, "step": 63050 }, { "epoch": 0.62, "grad_norm": 14.89112663269043, "learning_rate": 4.085448275862069e-06, "loss": 0.2311, "step": 63075 }, { "epoch": 0.62, "grad_norm": 21.7353572845459, "learning_rate": 4.085017241379311e-06, "loss": 0.4457, "step": 63100 }, { "epoch": 0.62, "grad_norm": 8.797821998596191, "learning_rate": 4.084586206896552e-06, "loss": 0.2687, "step": 63125 }, { "epoch": 0.62, "grad_norm": 20.863895416259766, "learning_rate": 4.084155172413794e-06, "loss": 0.3609, "step": 63150 }, { "epoch": 0.62, "grad_norm": 18.079391479492188, "learning_rate": 4.083724137931035e-06, "loss": 0.2113, "step": 63175 }, { "epoch": 0.62, "grad_norm": 22.065235137939453, "learning_rate": 4.0832931034482765e-06, "loss": 0.3861, "step": 63200 }, { "epoch": 0.62, "grad_norm": 11.997344970703125, "learning_rate": 4.082862068965517e-06, "loss": 0.2085, "step": 63225 }, { "epoch": 0.62, "grad_norm": 23.40819549560547, "learning_rate": 4.0824482758620694e-06, "loss": 0.4046, "step": 63250 }, { "epoch": 0.62, "grad_norm": 18.358675003051758, "learning_rate": 4.082017241379311e-06, "loss": 0.2404, "step": 63275 }, { "epoch": 0.62, "grad_norm": 12.178414344787598, "learning_rate": 4.081586206896552e-06, "loss": 0.3475, "step": 63300 }, { "epoch": 0.62, "grad_norm": 8.353638648986816, "learning_rate": 4.081155172413793e-06, "loss": 0.2086, "step": 63325 }, { "epoch": 0.62, "grad_norm": 19.32244873046875, "learning_rate": 4.080724137931034e-06, "loss": 0.4117, "step": 63350 }, { "epoch": 0.62, "grad_norm": 7.931299209594727, "learning_rate": 4.080293103448276e-06, "loss": 0.2315, "step": 63375 }, { "epoch": 0.62, "grad_norm": 12.33786392211914, "learning_rate": 4.079862068965517e-06, "loss": 0.3757, "step": 63400 }, { "epoch": 0.62, "grad_norm": 14.009076118469238, "learning_rate": 4.079431034482759e-06, "loss": 0.1821, "step": 63425 }, { "epoch": 0.62, "grad_norm": 16.48736000061035, "learning_rate": 4.079e-06, "loss": 0.355, "step": 63450 }, { "epoch": 0.62, "grad_norm": 9.845916748046875, "learning_rate": 4.078568965517242e-06, "loss": 0.2124, "step": 63475 }, { "epoch": 0.62, "grad_norm": 15.808370590209961, "learning_rate": 4.078137931034483e-06, "loss": 0.4499, "step": 63500 }, { "epoch": 0.62, "grad_norm": 20.358497619628906, "learning_rate": 4.077706896551725e-06, "loss": 0.2395, "step": 63525 }, { "epoch": 0.63, "grad_norm": 15.419340133666992, "learning_rate": 4.077275862068965e-06, "loss": 0.4158, "step": 63550 }, { "epoch": 0.63, "grad_norm": 5.333937644958496, "learning_rate": 4.0768448275862075e-06, "loss": 0.2631, "step": 63575 }, { "epoch": 0.63, "grad_norm": 16.079038619995117, "learning_rate": 4.076413793103449e-06, "loss": 0.3898, "step": 63600 }, { "epoch": 0.63, "grad_norm": 10.676823616027832, "learning_rate": 4.0759827586206904e-06, "loss": 0.2389, "step": 63625 }, { "epoch": 0.63, "grad_norm": 20.409666061401367, "learning_rate": 4.075551724137931e-06, "loss": 0.3796, "step": 63650 }, { "epoch": 0.63, "grad_norm": 8.26964282989502, "learning_rate": 4.0751206896551725e-06, "loss": 0.2465, "step": 63675 }, { "epoch": 0.63, "grad_norm": 15.801655769348145, "learning_rate": 4.074689655172414e-06, "loss": 0.356, "step": 63700 }, { "epoch": 0.63, "grad_norm": 8.147972106933594, "learning_rate": 4.074258620689655e-06, "loss": 0.1668, "step": 63725 }, { "epoch": 0.63, "grad_norm": 13.55654525756836, "learning_rate": 4.073827586206897e-06, "loss": 0.4088, "step": 63750 }, { "epoch": 0.63, "grad_norm": 6.6946187019348145, "learning_rate": 4.073396551724138e-06, "loss": 0.1791, "step": 63775 }, { "epoch": 0.63, "grad_norm": 29.950897216796875, "learning_rate": 4.07296551724138e-06, "loss": 0.3294, "step": 63800 }, { "epoch": 0.63, "grad_norm": 9.427214622497559, "learning_rate": 4.072534482758621e-06, "loss": 0.2028, "step": 63825 }, { "epoch": 0.63, "grad_norm": 16.284801483154297, "learning_rate": 4.072103448275863e-06, "loss": 0.402, "step": 63850 }, { "epoch": 0.63, "grad_norm": 9.118473052978516, "learning_rate": 4.071672413793103e-06, "loss": 0.1858, "step": 63875 }, { "epoch": 0.63, "grad_norm": 14.77173137664795, "learning_rate": 4.071241379310345e-06, "loss": 0.4537, "step": 63900 }, { "epoch": 0.63, "grad_norm": 10.556093215942383, "learning_rate": 4.070810344827586e-06, "loss": 0.2058, "step": 63925 }, { "epoch": 0.63, "grad_norm": 17.31320571899414, "learning_rate": 4.070379310344828e-06, "loss": 0.3998, "step": 63950 }, { "epoch": 0.63, "grad_norm": 9.60946273803711, "learning_rate": 4.069948275862069e-06, "loss": 0.194, "step": 63975 }, { "epoch": 0.63, "grad_norm": 16.053926467895508, "learning_rate": 4.069517241379311e-06, "loss": 0.4156, "step": 64000 }, { "epoch": 0.63, "grad_norm": 11.480051040649414, "learning_rate": 4.069086206896552e-06, "loss": 0.2385, "step": 64025 }, { "epoch": 0.63, "grad_norm": 16.913841247558594, "learning_rate": 4.0686551724137935e-06, "loss": 0.3868, "step": 64050 }, { "epoch": 0.63, "grad_norm": 9.998762130737305, "learning_rate": 4.068224137931035e-06, "loss": 0.2456, "step": 64075 }, { "epoch": 0.63, "grad_norm": 17.32744026184082, "learning_rate": 4.0677931034482756e-06, "loss": 0.357, "step": 64100 }, { "epoch": 0.63, "grad_norm": 11.749651908874512, "learning_rate": 4.067362068965517e-06, "loss": 0.2136, "step": 64125 }, { "epoch": 0.63, "grad_norm": 18.19765281677246, "learning_rate": 4.066931034482759e-06, "loss": 0.4169, "step": 64150 }, { "epoch": 0.63, "grad_norm": 15.3385009765625, "learning_rate": 4.066500000000001e-06, "loss": 0.2726, "step": 64175 }, { "epoch": 0.63, "grad_norm": 16.757761001586914, "learning_rate": 4.066068965517242e-06, "loss": 0.3591, "step": 64200 }, { "epoch": 0.63, "grad_norm": 5.552122592926025, "learning_rate": 4.065637931034483e-06, "loss": 0.2272, "step": 64225 }, { "epoch": 0.63, "grad_norm": 17.264738082885742, "learning_rate": 4.065206896551724e-06, "loss": 0.3803, "step": 64250 }, { "epoch": 0.63, "grad_norm": 12.15026569366455, "learning_rate": 4.064775862068966e-06, "loss": 0.2311, "step": 64275 }, { "epoch": 0.63, "grad_norm": 19.001693725585938, "learning_rate": 4.064344827586207e-06, "loss": 0.4607, "step": 64300 }, { "epoch": 0.63, "grad_norm": 17.42336654663086, "learning_rate": 4.063913793103449e-06, "loss": 0.2263, "step": 64325 }, { "epoch": 0.63, "grad_norm": 9.966620445251465, "learning_rate": 4.06348275862069e-06, "loss": 0.3449, "step": 64350 }, { "epoch": 0.63, "grad_norm": 5.535412311553955, "learning_rate": 4.063051724137932e-06, "loss": 0.2259, "step": 64375 }, { "epoch": 0.63, "grad_norm": 17.922462463378906, "learning_rate": 4.062620689655173e-06, "loss": 0.4251, "step": 64400 }, { "epoch": 0.63, "grad_norm": 9.339451789855957, "learning_rate": 4.0621896551724145e-06, "loss": 0.1923, "step": 64425 }, { "epoch": 0.63, "grad_norm": 16.903654098510742, "learning_rate": 4.061758620689655e-06, "loss": 0.381, "step": 64450 }, { "epoch": 0.63, "grad_norm": 7.001680374145508, "learning_rate": 4.0613275862068966e-06, "loss": 0.2322, "step": 64475 }, { "epoch": 0.63, "grad_norm": 14.817465782165527, "learning_rate": 4.060896551724138e-06, "loss": 0.3872, "step": 64500 }, { "epoch": 0.63, "grad_norm": 6.074315547943115, "learning_rate": 4.0604655172413795e-06, "loss": 0.262, "step": 64525 }, { "epoch": 0.63, "grad_norm": 16.864980697631836, "learning_rate": 4.060034482758621e-06, "loss": 0.3791, "step": 64550 }, { "epoch": 0.64, "grad_norm": 4.98783540725708, "learning_rate": 4.059603448275862e-06, "loss": 0.1813, "step": 64575 }, { "epoch": 0.64, "grad_norm": 15.475335121154785, "learning_rate": 4.059172413793104e-06, "loss": 0.3712, "step": 64600 }, { "epoch": 0.64, "grad_norm": 15.211700439453125, "learning_rate": 4.058741379310345e-06, "loss": 0.2297, "step": 64625 }, { "epoch": 0.64, "grad_norm": 23.99384117126465, "learning_rate": 4.058310344827587e-06, "loss": 0.3738, "step": 64650 }, { "epoch": 0.64, "grad_norm": 5.348606109619141, "learning_rate": 4.057879310344827e-06, "loss": 0.2195, "step": 64675 }, { "epoch": 0.64, "grad_norm": 11.724970817565918, "learning_rate": 4.057448275862069e-06, "loss": 0.3614, "step": 64700 }, { "epoch": 0.64, "grad_norm": 10.184335708618164, "learning_rate": 4.057017241379311e-06, "loss": 0.2122, "step": 64725 }, { "epoch": 0.64, "grad_norm": 19.262588500976562, "learning_rate": 4.056586206896553e-06, "loss": 0.363, "step": 64750 }, { "epoch": 0.64, "grad_norm": 14.02264404296875, "learning_rate": 4.056155172413793e-06, "loss": 0.1904, "step": 64775 }, { "epoch": 0.64, "grad_norm": 9.586224555969238, "learning_rate": 4.055724137931035e-06, "loss": 0.3814, "step": 64800 }, { "epoch": 0.64, "grad_norm": 9.694282531738281, "learning_rate": 4.055293103448276e-06, "loss": 0.2245, "step": 64825 }, { "epoch": 0.64, "grad_norm": 26.013158798217773, "learning_rate": 4.0548620689655176e-06, "loss": 0.4429, "step": 64850 }, { "epoch": 0.64, "grad_norm": 8.665509223937988, "learning_rate": 4.054431034482759e-06, "loss": 0.1778, "step": 64875 }, { "epoch": 0.64, "grad_norm": 18.20655059814453, "learning_rate": 4.0540000000000005e-06, "loss": 0.3279, "step": 64900 }, { "epoch": 0.64, "grad_norm": 10.202529907226562, "learning_rate": 4.053568965517242e-06, "loss": 0.185, "step": 64925 }, { "epoch": 0.64, "grad_norm": 19.0234375, "learning_rate": 4.053137931034483e-06, "loss": 0.3744, "step": 64950 }, { "epoch": 0.64, "grad_norm": 6.520525932312012, "learning_rate": 4.052706896551725e-06, "loss": 0.2408, "step": 64975 }, { "epoch": 0.64, "grad_norm": 19.116016387939453, "learning_rate": 4.0522758620689655e-06, "loss": 0.4131, "step": 65000 }, { "epoch": 0.64, "grad_norm": 8.282083511352539, "learning_rate": 4.051844827586207e-06, "loss": 0.2646, "step": 65025 }, { "epoch": 0.64, "grad_norm": 19.583110809326172, "learning_rate": 4.051413793103448e-06, "loss": 0.3542, "step": 65050 }, { "epoch": 0.64, "grad_norm": 13.830689430236816, "learning_rate": 4.05098275862069e-06, "loss": 0.2402, "step": 65075 }, { "epoch": 0.64, "grad_norm": 24.51988983154297, "learning_rate": 4.050551724137931e-06, "loss": 0.4197, "step": 65100 }, { "epoch": 0.64, "grad_norm": 9.122455596923828, "learning_rate": 4.050120689655173e-06, "loss": 0.2291, "step": 65125 }, { "epoch": 0.64, "grad_norm": 11.83525562286377, "learning_rate": 4.049689655172414e-06, "loss": 0.3371, "step": 65150 }, { "epoch": 0.64, "grad_norm": 9.745088577270508, "learning_rate": 4.049258620689656e-06, "loss": 0.1865, "step": 65175 }, { "epoch": 0.64, "grad_norm": 13.68925952911377, "learning_rate": 4.048827586206897e-06, "loss": 0.3644, "step": 65200 }, { "epoch": 0.64, "grad_norm": 17.35293960571289, "learning_rate": 4.048396551724138e-06, "loss": 0.2265, "step": 65225 }, { "epoch": 0.64, "grad_norm": 17.325294494628906, "learning_rate": 4.047965517241379e-06, "loss": 0.4172, "step": 65250 }, { "epoch": 0.64, "grad_norm": 8.233837127685547, "learning_rate": 4.047534482758621e-06, "loss": 0.201, "step": 65275 }, { "epoch": 0.64, "grad_norm": 23.34113883972168, "learning_rate": 4.047120689655173e-06, "loss": 0.3405, "step": 65300 }, { "epoch": 0.64, "grad_norm": 8.768641471862793, "learning_rate": 4.0466896551724136e-06, "loss": 0.2074, "step": 65325 }, { "epoch": 0.64, "grad_norm": 20.97933578491211, "learning_rate": 4.046258620689655e-06, "loss": 0.395, "step": 65350 }, { "epoch": 0.64, "grad_norm": 13.763354301452637, "learning_rate": 4.0458275862068965e-06, "loss": 0.2356, "step": 65375 }, { "epoch": 0.64, "grad_norm": 18.29585075378418, "learning_rate": 4.045396551724139e-06, "loss": 0.4076, "step": 65400 }, { "epoch": 0.64, "grad_norm": 15.880889892578125, "learning_rate": 4.04496551724138e-06, "loss": 0.241, "step": 65425 }, { "epoch": 0.64, "grad_norm": 16.691802978515625, "learning_rate": 4.044534482758621e-06, "loss": 0.3238, "step": 65450 }, { "epoch": 0.64, "grad_norm": 6.544596195220947, "learning_rate": 4.044103448275862e-06, "loss": 0.199, "step": 65475 }, { "epoch": 0.64, "grad_norm": 22.4457950592041, "learning_rate": 4.043672413793104e-06, "loss": 0.4453, "step": 65500 }, { "epoch": 0.64, "grad_norm": 8.278060913085938, "learning_rate": 4.043241379310345e-06, "loss": 0.1629, "step": 65525 }, { "epoch": 0.64, "grad_norm": 12.864997863769531, "learning_rate": 4.042810344827587e-06, "loss": 0.3489, "step": 65550 }, { "epoch": 0.64, "grad_norm": 14.322874069213867, "learning_rate": 4.042379310344828e-06, "loss": 0.2086, "step": 65575 }, { "epoch": 0.65, "grad_norm": 20.00027847290039, "learning_rate": 4.04194827586207e-06, "loss": 0.4419, "step": 65600 }, { "epoch": 0.65, "grad_norm": 8.653555870056152, "learning_rate": 4.041517241379311e-06, "loss": 0.2042, "step": 65625 }, { "epoch": 0.65, "grad_norm": 13.538630485534668, "learning_rate": 4.0410862068965525e-06, "loss": 0.3926, "step": 65650 }, { "epoch": 0.65, "grad_norm": 9.403098106384277, "learning_rate": 4.040655172413793e-06, "loss": 0.2058, "step": 65675 }, { "epoch": 0.65, "grad_norm": 11.491191864013672, "learning_rate": 4.0402241379310346e-06, "loss": 0.3647, "step": 65700 }, { "epoch": 0.65, "grad_norm": 7.897934436798096, "learning_rate": 4.039793103448276e-06, "loss": 0.1749, "step": 65725 }, { "epoch": 0.65, "grad_norm": 22.834714889526367, "learning_rate": 4.0393620689655175e-06, "loss": 0.367, "step": 65750 }, { "epoch": 0.65, "grad_norm": 7.700301170349121, "learning_rate": 4.038931034482759e-06, "loss": 0.2328, "step": 65775 }, { "epoch": 0.65, "grad_norm": 11.251810073852539, "learning_rate": 4.0385e-06, "loss": 0.4422, "step": 65800 }, { "epoch": 0.65, "grad_norm": 5.979805946350098, "learning_rate": 4.038068965517242e-06, "loss": 0.213, "step": 65825 }, { "epoch": 0.65, "grad_norm": 24.080509185791016, "learning_rate": 4.037637931034483e-06, "loss": 0.4027, "step": 65850 }, { "epoch": 0.65, "grad_norm": 7.777535438537598, "learning_rate": 4.037206896551725e-06, "loss": 0.1862, "step": 65875 }, { "epoch": 0.65, "grad_norm": 21.949691772460938, "learning_rate": 4.036775862068965e-06, "loss": 0.4689, "step": 65900 }, { "epoch": 0.65, "grad_norm": 11.946148872375488, "learning_rate": 4.036344827586207e-06, "loss": 0.2466, "step": 65925 }, { "epoch": 0.65, "grad_norm": 19.115314483642578, "learning_rate": 4.035913793103448e-06, "loss": 0.3718, "step": 65950 }, { "epoch": 0.65, "grad_norm": 10.546974182128906, "learning_rate": 4.035482758620691e-06, "loss": 0.1996, "step": 65975 }, { "epoch": 0.65, "grad_norm": 18.45782470703125, "learning_rate": 4.035051724137931e-06, "loss": 0.417, "step": 66000 }, { "epoch": 0.65, "grad_norm": 3.8930680751800537, "learning_rate": 4.034620689655173e-06, "loss": 0.2244, "step": 66025 }, { "epoch": 0.65, "grad_norm": 24.23253631591797, "learning_rate": 4.034189655172414e-06, "loss": 0.407, "step": 66050 }, { "epoch": 0.65, "grad_norm": 6.5493245124816895, "learning_rate": 4.0337586206896556e-06, "loss": 0.183, "step": 66075 }, { "epoch": 0.65, "grad_norm": 16.679819107055664, "learning_rate": 4.033327586206897e-06, "loss": 0.3776, "step": 66100 }, { "epoch": 0.65, "grad_norm": 15.770505905151367, "learning_rate": 4.0328965517241385e-06, "loss": 0.205, "step": 66125 }, { "epoch": 0.65, "grad_norm": 17.30156135559082, "learning_rate": 4.03246551724138e-06, "loss": 0.3751, "step": 66150 }, { "epoch": 0.65, "grad_norm": 6.591464519500732, "learning_rate": 4.032034482758621e-06, "loss": 0.2007, "step": 66175 }, { "epoch": 0.65, "grad_norm": 16.912878036499023, "learning_rate": 4.031603448275863e-06, "loss": 0.4104, "step": 66200 }, { "epoch": 0.65, "grad_norm": 9.793399810791016, "learning_rate": 4.0311724137931034e-06, "loss": 0.1903, "step": 66225 }, { "epoch": 0.65, "grad_norm": 19.61400032043457, "learning_rate": 4.030741379310345e-06, "loss": 0.4798, "step": 66250 }, { "epoch": 0.65, "grad_norm": 4.666830062866211, "learning_rate": 4.030310344827586e-06, "loss": 0.1719, "step": 66275 }, { "epoch": 0.65, "grad_norm": 12.73576545715332, "learning_rate": 4.029879310344828e-06, "loss": 0.3907, "step": 66300 }, { "epoch": 0.65, "grad_norm": 5.626204013824463, "learning_rate": 4.029448275862069e-06, "loss": 0.2263, "step": 66325 }, { "epoch": 0.65, "grad_norm": 28.68054962158203, "learning_rate": 4.029017241379311e-06, "loss": 0.4669, "step": 66350 }, { "epoch": 0.65, "grad_norm": 9.231110572814941, "learning_rate": 4.028586206896552e-06, "loss": 0.2572, "step": 66375 }, { "epoch": 0.65, "grad_norm": 15.214405059814453, "learning_rate": 4.028155172413794e-06, "loss": 0.359, "step": 66400 }, { "epoch": 0.65, "grad_norm": 11.193442344665527, "learning_rate": 4.027724137931035e-06, "loss": 0.2392, "step": 66425 }, { "epoch": 0.65, "grad_norm": 14.849010467529297, "learning_rate": 4.027293103448276e-06, "loss": 0.3528, "step": 66450 }, { "epoch": 0.65, "grad_norm": 12.072067260742188, "learning_rate": 4.026862068965517e-06, "loss": 0.283, "step": 66475 }, { "epoch": 0.65, "grad_norm": 12.204949378967285, "learning_rate": 4.026431034482759e-06, "loss": 0.4486, "step": 66500 }, { "epoch": 0.65, "grad_norm": 11.957564353942871, "learning_rate": 4.026e-06, "loss": 0.193, "step": 66525 }, { "epoch": 0.65, "grad_norm": 15.462628364562988, "learning_rate": 4.025568965517242e-06, "loss": 0.3643, "step": 66550 }, { "epoch": 0.65, "grad_norm": 7.964373588562012, "learning_rate": 4.025137931034483e-06, "loss": 0.2182, "step": 66575 }, { "epoch": 0.66, "grad_norm": 17.1630916595459, "learning_rate": 4.0247068965517244e-06, "loss": 0.4131, "step": 66600 }, { "epoch": 0.66, "grad_norm": 7.530055046081543, "learning_rate": 4.024275862068966e-06, "loss": 0.2014, "step": 66625 }, { "epoch": 0.66, "grad_norm": 15.557901382446289, "learning_rate": 4.023844827586207e-06, "loss": 0.3705, "step": 66650 }, { "epoch": 0.66, "grad_norm": 7.50545072555542, "learning_rate": 4.023413793103448e-06, "loss": 0.2175, "step": 66675 }, { "epoch": 0.66, "grad_norm": 19.119401931762695, "learning_rate": 4.02298275862069e-06, "loss": 0.3561, "step": 66700 }, { "epoch": 0.66, "grad_norm": 12.666786193847656, "learning_rate": 4.022551724137932e-06, "loss": 0.2015, "step": 66725 }, { "epoch": 0.66, "grad_norm": 24.745567321777344, "learning_rate": 4.022120689655173e-06, "loss": 0.3856, "step": 66750 }, { "epoch": 0.66, "grad_norm": 6.077108860015869, "learning_rate": 4.021689655172415e-06, "loss": 0.1972, "step": 66775 }, { "epoch": 0.66, "grad_norm": 18.981931686401367, "learning_rate": 4.021258620689655e-06, "loss": 0.3888, "step": 66800 }, { "epoch": 0.66, "grad_norm": 12.638920783996582, "learning_rate": 4.020827586206897e-06, "loss": 0.2029, "step": 66825 }, { "epoch": 0.66, "grad_norm": 18.40876579284668, "learning_rate": 4.020396551724138e-06, "loss": 0.383, "step": 66850 }, { "epoch": 0.66, "grad_norm": 15.962498664855957, "learning_rate": 4.01996551724138e-06, "loss": 0.2629, "step": 66875 }, { "epoch": 0.66, "grad_norm": 21.385456085205078, "learning_rate": 4.019534482758621e-06, "loss": 0.3575, "step": 66900 }, { "epoch": 0.66, "grad_norm": 8.983206748962402, "learning_rate": 4.0191034482758625e-06, "loss": 0.2321, "step": 66925 }, { "epoch": 0.66, "grad_norm": 15.377567291259766, "learning_rate": 4.018672413793104e-06, "loss": 0.3622, "step": 66950 }, { "epoch": 0.66, "grad_norm": 15.168397903442383, "learning_rate": 4.0182413793103454e-06, "loss": 0.2898, "step": 66975 }, { "epoch": 0.66, "grad_norm": 16.538976669311523, "learning_rate": 4.017810344827587e-06, "loss": 0.4362, "step": 67000 }, { "epoch": 0.66, "grad_norm": 10.900341987609863, "learning_rate": 4.0173793103448275e-06, "loss": 0.2331, "step": 67025 }, { "epoch": 0.66, "grad_norm": 23.14933967590332, "learning_rate": 4.016948275862069e-06, "loss": 0.3305, "step": 67050 }, { "epoch": 0.66, "grad_norm": 12.91173267364502, "learning_rate": 4.0165172413793104e-06, "loss": 0.2099, "step": 67075 }, { "epoch": 0.66, "grad_norm": 12.956082344055176, "learning_rate": 4.016086206896552e-06, "loss": 0.3809, "step": 67100 }, { "epoch": 0.66, "grad_norm": 9.460631370544434, "learning_rate": 4.015655172413793e-06, "loss": 0.2103, "step": 67125 }, { "epoch": 0.66, "grad_norm": 21.512287139892578, "learning_rate": 4.015224137931035e-06, "loss": 0.4351, "step": 67150 }, { "epoch": 0.66, "grad_norm": 5.757533073425293, "learning_rate": 4.014793103448276e-06, "loss": 0.2326, "step": 67175 }, { "epoch": 0.66, "grad_norm": 20.601377487182617, "learning_rate": 4.014362068965518e-06, "loss": 0.4582, "step": 67200 }, { "epoch": 0.66, "grad_norm": 8.254968643188477, "learning_rate": 4.013931034482759e-06, "loss": 0.2103, "step": 67225 }, { "epoch": 0.66, "grad_norm": 13.52637004852295, "learning_rate": 4.0135e-06, "loss": 0.4523, "step": 67250 }, { "epoch": 0.66, "grad_norm": 9.696907043457031, "learning_rate": 4.013068965517242e-06, "loss": 0.2236, "step": 67275 }, { "epoch": 0.66, "grad_norm": 26.439088821411133, "learning_rate": 4.0126379310344835e-06, "loss": 0.4308, "step": 67300 }, { "epoch": 0.66, "grad_norm": 5.787468433380127, "learning_rate": 4.012206896551725e-06, "loss": 0.194, "step": 67325 }, { "epoch": 0.66, "grad_norm": 14.661358833312988, "learning_rate": 4.011793103448276e-06, "loss": 0.3611, "step": 67350 }, { "epoch": 0.66, "grad_norm": 13.6917142868042, "learning_rate": 4.011362068965518e-06, "loss": 0.2, "step": 67375 }, { "epoch": 0.66, "grad_norm": 21.33005714416504, "learning_rate": 4.010931034482759e-06, "loss": 0.3622, "step": 67400 }, { "epoch": 0.66, "grad_norm": 10.532792091369629, "learning_rate": 4.010500000000001e-06, "loss": 0.2255, "step": 67425 }, { "epoch": 0.66, "grad_norm": 21.3338565826416, "learning_rate": 4.0100689655172414e-06, "loss": 0.3894, "step": 67450 }, { "epoch": 0.66, "grad_norm": 11.021065711975098, "learning_rate": 4.009637931034483e-06, "loss": 0.2411, "step": 67475 }, { "epoch": 0.66, "grad_norm": 15.969390869140625, "learning_rate": 4.009206896551724e-06, "loss": 0.3633, "step": 67500 }, { "epoch": 0.66, "grad_norm": 20.908126831054688, "learning_rate": 4.008775862068966e-06, "loss": 0.2207, "step": 67525 }, { "epoch": 0.66, "grad_norm": 22.421911239624023, "learning_rate": 4.008344827586207e-06, "loss": 0.3752, "step": 67550 }, { "epoch": 0.66, "grad_norm": 9.008833885192871, "learning_rate": 4.007913793103449e-06, "loss": 0.2256, "step": 67575 }, { "epoch": 0.66, "grad_norm": 18.73096466064453, "learning_rate": 4.00748275862069e-06, "loss": 0.3888, "step": 67600 }, { "epoch": 0.67, "grad_norm": 8.09022331237793, "learning_rate": 4.007051724137932e-06, "loss": 0.2336, "step": 67625 }, { "epoch": 0.67, "grad_norm": 19.34869956970215, "learning_rate": 4.006620689655173e-06, "loss": 0.4567, "step": 67650 }, { "epoch": 0.67, "grad_norm": 15.249298095703125, "learning_rate": 4.006189655172414e-06, "loss": 0.1892, "step": 67675 }, { "epoch": 0.67, "grad_norm": 15.270783424377441, "learning_rate": 4.005758620689655e-06, "loss": 0.3773, "step": 67700 }, { "epoch": 0.67, "grad_norm": 8.815509796142578, "learning_rate": 4.005327586206897e-06, "loss": 0.2107, "step": 67725 }, { "epoch": 0.67, "grad_norm": 23.624961853027344, "learning_rate": 4.004896551724138e-06, "loss": 0.3772, "step": 67750 }, { "epoch": 0.67, "grad_norm": 10.003609657287598, "learning_rate": 4.0044655172413795e-06, "loss": 0.2345, "step": 67775 }, { "epoch": 0.67, "grad_norm": 16.728858947753906, "learning_rate": 4.004034482758621e-06, "loss": 0.4777, "step": 67800 }, { "epoch": 0.67, "grad_norm": 12.638517379760742, "learning_rate": 4.0036034482758624e-06, "loss": 0.2153, "step": 67825 }, { "epoch": 0.67, "grad_norm": 18.552831649780273, "learning_rate": 4.003172413793104e-06, "loss": 0.3885, "step": 67850 }, { "epoch": 0.67, "grad_norm": 7.283578395843506, "learning_rate": 4.002741379310345e-06, "loss": 0.2163, "step": 67875 }, { "epoch": 0.67, "grad_norm": 22.482999801635742, "learning_rate": 4.002310344827586e-06, "loss": 0.3945, "step": 67900 }, { "epoch": 0.67, "grad_norm": 8.6344633102417, "learning_rate": 4.001879310344827e-06, "loss": 0.2142, "step": 67925 }, { "epoch": 0.67, "grad_norm": 13.550893783569336, "learning_rate": 4.00144827586207e-06, "loss": 0.4509, "step": 67950 }, { "epoch": 0.67, "grad_norm": 8.022250175476074, "learning_rate": 4.001017241379311e-06, "loss": 0.2583, "step": 67975 }, { "epoch": 0.67, "grad_norm": 17.996315002441406, "learning_rate": 4.000586206896553e-06, "loss": 0.4268, "step": 68000 }, { "epoch": 0.67, "grad_norm": 7.413865566253662, "learning_rate": 4.000155172413793e-06, "loss": 0.2279, "step": 68025 }, { "epoch": 0.67, "grad_norm": 19.140933990478516, "learning_rate": 3.999724137931035e-06, "loss": 0.3345, "step": 68050 }, { "epoch": 0.67, "grad_norm": 9.992202758789062, "learning_rate": 3.999293103448276e-06, "loss": 0.1939, "step": 68075 }, { "epoch": 0.67, "grad_norm": 17.5815486907959, "learning_rate": 3.998862068965518e-06, "loss": 0.3836, "step": 68100 }, { "epoch": 0.67, "grad_norm": 7.512156963348389, "learning_rate": 3.998431034482759e-06, "loss": 0.2231, "step": 68125 }, { "epoch": 0.67, "grad_norm": 15.712413787841797, "learning_rate": 3.9980000000000005e-06, "loss": 0.4285, "step": 68150 }, { "epoch": 0.67, "grad_norm": 8.352758407592773, "learning_rate": 3.997568965517242e-06, "loss": 0.2205, "step": 68175 }, { "epoch": 0.67, "grad_norm": 14.996459007263184, "learning_rate": 3.9971379310344834e-06, "loss": 0.4006, "step": 68200 }, { "epoch": 0.67, "grad_norm": 10.544976234436035, "learning_rate": 3.996706896551725e-06, "loss": 0.2101, "step": 68225 }, { "epoch": 0.67, "grad_norm": 13.95573616027832, "learning_rate": 3.9962758620689655e-06, "loss": 0.3848, "step": 68250 }, { "epoch": 0.67, "grad_norm": 9.249225616455078, "learning_rate": 3.995844827586207e-06, "loss": 0.2144, "step": 68275 }, { "epoch": 0.67, "grad_norm": 14.08360481262207, "learning_rate": 3.995413793103448e-06, "loss": 0.3804, "step": 68300 }, { "epoch": 0.67, "grad_norm": 5.006259918212891, "learning_rate": 3.99498275862069e-06, "loss": 0.2489, "step": 68325 }, { "epoch": 0.67, "grad_norm": 14.916655540466309, "learning_rate": 3.994551724137931e-06, "loss": 0.457, "step": 68350 }, { "epoch": 0.67, "grad_norm": 9.149698257446289, "learning_rate": 3.994120689655173e-06, "loss": 0.217, "step": 68375 }, { "epoch": 0.67, "grad_norm": 14.689435958862305, "learning_rate": 3.993689655172414e-06, "loss": 0.3678, "step": 68400 }, { "epoch": 0.67, "grad_norm": 14.54647159576416, "learning_rate": 3.993258620689656e-06, "loss": 0.1891, "step": 68425 }, { "epoch": 0.67, "grad_norm": 21.976367950439453, "learning_rate": 3.992827586206897e-06, "loss": 0.3127, "step": 68450 }, { "epoch": 0.67, "grad_norm": 8.039214134216309, "learning_rate": 3.992396551724138e-06, "loss": 0.2849, "step": 68475 }, { "epoch": 0.67, "grad_norm": 17.02557373046875, "learning_rate": 3.991965517241379e-06, "loss": 0.4749, "step": 68500 }, { "epoch": 0.67, "grad_norm": 16.03524398803711, "learning_rate": 3.9915344827586215e-06, "loss": 0.2229, "step": 68525 }, { "epoch": 0.67, "grad_norm": 13.105344772338867, "learning_rate": 3.991103448275863e-06, "loss": 0.3582, "step": 68550 }, { "epoch": 0.67, "grad_norm": 11.597997665405273, "learning_rate": 3.990672413793104e-06, "loss": 0.2135, "step": 68575 }, { "epoch": 0.67, "grad_norm": 15.979255676269531, "learning_rate": 3.990241379310345e-06, "loss": 0.3776, "step": 68600 }, { "epoch": 0.67, "grad_norm": 5.561490058898926, "learning_rate": 3.9898103448275865e-06, "loss": 0.2042, "step": 68625 }, { "epoch": 0.68, "grad_norm": 19.816186904907227, "learning_rate": 3.989379310344828e-06, "loss": 0.4018, "step": 68650 }, { "epoch": 0.68, "grad_norm": 14.290122032165527, "learning_rate": 3.988948275862069e-06, "loss": 0.2338, "step": 68675 }, { "epoch": 0.68, "grad_norm": 21.977018356323242, "learning_rate": 3.988517241379311e-06, "loss": 0.4436, "step": 68700 }, { "epoch": 0.68, "grad_norm": 10.629137992858887, "learning_rate": 3.988086206896552e-06, "loss": 0.2018, "step": 68725 }, { "epoch": 0.68, "grad_norm": 13.908961296081543, "learning_rate": 3.987655172413794e-06, "loss": 0.3706, "step": 68750 }, { "epoch": 0.68, "grad_norm": 16.49909210205078, "learning_rate": 3.987224137931035e-06, "loss": 0.213, "step": 68775 }, { "epoch": 0.68, "grad_norm": 22.114652633666992, "learning_rate": 3.986793103448276e-06, "loss": 0.3924, "step": 68800 }, { "epoch": 0.68, "grad_norm": 8.899078369140625, "learning_rate": 3.986362068965517e-06, "loss": 0.2604, "step": 68825 }, { "epoch": 0.68, "grad_norm": 13.003636360168457, "learning_rate": 3.985931034482759e-06, "loss": 0.4282, "step": 68850 }, { "epoch": 0.68, "grad_norm": 11.289175987243652, "learning_rate": 3.9855e-06, "loss": 0.2186, "step": 68875 }, { "epoch": 0.68, "grad_norm": 16.313329696655273, "learning_rate": 3.985068965517242e-06, "loss": 0.3981, "step": 68900 }, { "epoch": 0.68, "grad_norm": 14.964376449584961, "learning_rate": 3.984637931034483e-06, "loss": 0.2458, "step": 68925 }, { "epoch": 0.68, "grad_norm": 20.09209442138672, "learning_rate": 3.984206896551725e-06, "loss": 0.4411, "step": 68950 }, { "epoch": 0.68, "grad_norm": 15.408573150634766, "learning_rate": 3.983775862068966e-06, "loss": 0.2498, "step": 68975 }, { "epoch": 0.68, "grad_norm": 18.160367965698242, "learning_rate": 3.9833448275862075e-06, "loss": 0.4033, "step": 69000 }, { "epoch": 0.68, "grad_norm": 7.461757183074951, "learning_rate": 3.982913793103448e-06, "loss": 0.2027, "step": 69025 }, { "epoch": 0.68, "grad_norm": 22.218923568725586, "learning_rate": 3.9824827586206896e-06, "loss": 0.4133, "step": 69050 }, { "epoch": 0.68, "grad_norm": 14.111663818359375, "learning_rate": 3.982051724137931e-06, "loss": 0.2441, "step": 69075 }, { "epoch": 0.68, "grad_norm": 22.920166015625, "learning_rate": 3.981620689655173e-06, "loss": 0.3594, "step": 69100 }, { "epoch": 0.68, "grad_norm": 10.106644630432129, "learning_rate": 3.981189655172414e-06, "loss": 0.239, "step": 69125 }, { "epoch": 0.68, "grad_norm": 19.079648971557617, "learning_rate": 3.980758620689655e-06, "loss": 0.3487, "step": 69150 }, { "epoch": 0.68, "grad_norm": 10.841383934020996, "learning_rate": 3.980327586206897e-06, "loss": 0.2309, "step": 69175 }, { "epoch": 0.68, "grad_norm": 19.757190704345703, "learning_rate": 3.979896551724138e-06, "loss": 0.3615, "step": 69200 }, { "epoch": 0.68, "grad_norm": 7.391101360321045, "learning_rate": 3.97946551724138e-06, "loss": 0.2327, "step": 69225 }, { "epoch": 0.68, "grad_norm": 31.32239532470703, "learning_rate": 3.979034482758621e-06, "loss": 0.4105, "step": 69250 }, { "epoch": 0.68, "grad_norm": 10.052584648132324, "learning_rate": 3.978603448275863e-06, "loss": 0.2402, "step": 69275 }, { "epoch": 0.68, "grad_norm": 20.92659568786621, "learning_rate": 3.978172413793104e-06, "loss": 0.3526, "step": 69300 }, { "epoch": 0.68, "grad_norm": 6.876237869262695, "learning_rate": 3.977741379310346e-06, "loss": 0.2118, "step": 69325 }, { "epoch": 0.68, "grad_norm": 16.193450927734375, "learning_rate": 3.977310344827586e-06, "loss": 0.3863, "step": 69350 }, { "epoch": 0.68, "grad_norm": 12.574960708618164, "learning_rate": 3.976879310344828e-06, "loss": 0.2575, "step": 69375 }, { "epoch": 0.68, "grad_norm": 18.675647735595703, "learning_rate": 3.976448275862069e-06, "loss": 0.3571, "step": 69400 }, { "epoch": 0.68, "grad_norm": 6.193560600280762, "learning_rate": 3.9760172413793106e-06, "loss": 0.2314, "step": 69425 }, { "epoch": 0.68, "grad_norm": 18.757970809936523, "learning_rate": 3.975603448275863e-06, "loss": 0.4174, "step": 69450 }, { "epoch": 0.68, "grad_norm": 9.642799377441406, "learning_rate": 3.9751724137931035e-06, "loss": 0.2403, "step": 69475 }, { "epoch": 0.68, "grad_norm": 16.853694915771484, "learning_rate": 3.974741379310345e-06, "loss": 0.399, "step": 69500 }, { "epoch": 0.68, "grad_norm": 6.556696891784668, "learning_rate": 3.974310344827586e-06, "loss": 0.2231, "step": 69525 }, { "epoch": 0.68, "grad_norm": 14.212970733642578, "learning_rate": 3.973879310344828e-06, "loss": 0.3299, "step": 69550 }, { "epoch": 0.68, "grad_norm": 15.7498197555542, "learning_rate": 3.973448275862069e-06, "loss": 0.2058, "step": 69575 }, { "epoch": 0.68, "grad_norm": 18.960235595703125, "learning_rate": 3.973017241379311e-06, "loss": 0.452, "step": 69600 }, { "epoch": 0.68, "grad_norm": 19.204851150512695, "learning_rate": 3.972586206896552e-06, "loss": 0.2145, "step": 69625 }, { "epoch": 0.69, "grad_norm": 19.954652786254883, "learning_rate": 3.972155172413794e-06, "loss": 0.3348, "step": 69650 }, { "epoch": 0.69, "grad_norm": 12.436634063720703, "learning_rate": 3.971724137931035e-06, "loss": 0.2121, "step": 69675 }, { "epoch": 0.69, "grad_norm": 20.326343536376953, "learning_rate": 3.971293103448276e-06, "loss": 0.3478, "step": 69700 }, { "epoch": 0.69, "grad_norm": 9.818232536315918, "learning_rate": 3.970862068965517e-06, "loss": 0.2205, "step": 69725 }, { "epoch": 0.69, "grad_norm": 15.705535888671875, "learning_rate": 3.970431034482759e-06, "loss": 0.4423, "step": 69750 }, { "epoch": 0.69, "grad_norm": 6.407658576965332, "learning_rate": 3.97e-06, "loss": 0.2635, "step": 69775 }, { "epoch": 0.69, "grad_norm": 17.847759246826172, "learning_rate": 3.9695689655172416e-06, "loss": 0.3842, "step": 69800 }, { "epoch": 0.69, "grad_norm": 5.202850341796875, "learning_rate": 3.969137931034483e-06, "loss": 0.2003, "step": 69825 }, { "epoch": 0.69, "grad_norm": 28.039377212524414, "learning_rate": 3.9687068965517245e-06, "loss": 0.3502, "step": 69850 }, { "epoch": 0.69, "grad_norm": 6.5532331466674805, "learning_rate": 3.968275862068966e-06, "loss": 0.1817, "step": 69875 }, { "epoch": 0.69, "grad_norm": 15.639516830444336, "learning_rate": 3.967844827586207e-06, "loss": 0.413, "step": 69900 }, { "epoch": 0.69, "grad_norm": 8.953516960144043, "learning_rate": 3.967413793103449e-06, "loss": 0.1898, "step": 69925 }, { "epoch": 0.69, "grad_norm": 15.974419593811035, "learning_rate": 3.96698275862069e-06, "loss": 0.3943, "step": 69950 }, { "epoch": 0.69, "grad_norm": 13.579646110534668, "learning_rate": 3.966551724137932e-06, "loss": 0.1971, "step": 69975 }, { "epoch": 0.69, "grad_norm": 13.358168601989746, "learning_rate": 3.966120689655173e-06, "loss": 0.3901, "step": 70000 }, { "epoch": 0.69, "grad_norm": 14.244304656982422, "learning_rate": 3.965689655172414e-06, "loss": 0.2334, "step": 70025 }, { "epoch": 0.69, "grad_norm": 22.0170841217041, "learning_rate": 3.965258620689655e-06, "loss": 0.4442, "step": 70050 }, { "epoch": 0.69, "grad_norm": 6.8966779708862305, "learning_rate": 3.964827586206897e-06, "loss": 0.2828, "step": 70075 }, { "epoch": 0.69, "grad_norm": 16.990493774414062, "learning_rate": 3.964396551724138e-06, "loss": 0.3617, "step": 70100 }, { "epoch": 0.69, "grad_norm": 11.636064529418945, "learning_rate": 3.96396551724138e-06, "loss": 0.2148, "step": 70125 }, { "epoch": 0.69, "grad_norm": 17.933467864990234, "learning_rate": 3.963534482758621e-06, "loss": 0.4786, "step": 70150 }, { "epoch": 0.69, "grad_norm": 13.022367477416992, "learning_rate": 3.9631034482758626e-06, "loss": 0.2498, "step": 70175 }, { "epoch": 0.69, "grad_norm": 22.408823013305664, "learning_rate": 3.962672413793104e-06, "loss": 0.4382, "step": 70200 }, { "epoch": 0.69, "grad_norm": 6.318054676055908, "learning_rate": 3.9622413793103455e-06, "loss": 0.1669, "step": 70225 }, { "epoch": 0.69, "grad_norm": 19.1153507232666, "learning_rate": 3.961810344827586e-06, "loss": 0.4063, "step": 70250 }, { "epoch": 0.69, "grad_norm": 3.046915054321289, "learning_rate": 3.9613793103448275e-06, "loss": 0.1895, "step": 70275 }, { "epoch": 0.69, "grad_norm": 17.598865509033203, "learning_rate": 3.960948275862069e-06, "loss": 0.4365, "step": 70300 }, { "epoch": 0.69, "grad_norm": 10.935037612915039, "learning_rate": 3.9605172413793105e-06, "loss": 0.2173, "step": 70325 }, { "epoch": 0.69, "grad_norm": 12.116410255432129, "learning_rate": 3.960086206896552e-06, "loss": 0.3741, "step": 70350 }, { "epoch": 0.69, "grad_norm": 6.479507923126221, "learning_rate": 3.959655172413793e-06, "loss": 0.2008, "step": 70375 }, { "epoch": 0.69, "grad_norm": 7.806881904602051, "learning_rate": 3.959224137931035e-06, "loss": 0.3069, "step": 70400 }, { "epoch": 0.69, "grad_norm": 9.4569091796875, "learning_rate": 3.958793103448276e-06, "loss": 0.26, "step": 70425 }, { "epoch": 0.69, "grad_norm": 1.9533863067626953, "learning_rate": 3.958362068965518e-06, "loss": 0.3187, "step": 70450 }, { "epoch": 0.69, "grad_norm": 9.309505462646484, "learning_rate": 3.957931034482758e-06, "loss": 0.2561, "step": 70475 }, { "epoch": 0.69, "grad_norm": 18.493520736694336, "learning_rate": 3.957500000000001e-06, "loss": 0.4469, "step": 70500 }, { "epoch": 0.69, "grad_norm": 8.373488426208496, "learning_rate": 3.957068965517242e-06, "loss": 0.1721, "step": 70525 }, { "epoch": 0.69, "grad_norm": 17.580856323242188, "learning_rate": 3.9566379310344836e-06, "loss": 0.4114, "step": 70550 }, { "epoch": 0.69, "grad_norm": 9.333883285522461, "learning_rate": 3.956206896551724e-06, "loss": 0.2367, "step": 70575 }, { "epoch": 0.69, "grad_norm": 20.862205505371094, "learning_rate": 3.955775862068966e-06, "loss": 0.3829, "step": 70600 }, { "epoch": 0.69, "grad_norm": 7.079866409301758, "learning_rate": 3.955344827586207e-06, "loss": 0.1576, "step": 70625 }, { "epoch": 0.69, "grad_norm": 18.420305252075195, "learning_rate": 3.9549137931034485e-06, "loss": 0.3311, "step": 70650 }, { "epoch": 0.7, "grad_norm": 13.099349021911621, "learning_rate": 3.95448275862069e-06, "loss": 0.2319, "step": 70675 }, { "epoch": 0.7, "grad_norm": 23.57392120361328, "learning_rate": 3.9540517241379315e-06, "loss": 0.4303, "step": 70700 }, { "epoch": 0.7, "grad_norm": 9.370461463928223, "learning_rate": 3.953620689655173e-06, "loss": 0.2646, "step": 70725 }, { "epoch": 0.7, "grad_norm": 24.365234375, "learning_rate": 3.953189655172414e-06, "loss": 0.4449, "step": 70750 }, { "epoch": 0.7, "grad_norm": 12.928755760192871, "learning_rate": 3.952758620689656e-06, "loss": 0.2335, "step": 70775 }, { "epoch": 0.7, "grad_norm": 17.429410934448242, "learning_rate": 3.9523275862068964e-06, "loss": 0.3981, "step": 70800 }, { "epoch": 0.7, "grad_norm": 16.713397979736328, "learning_rate": 3.951896551724138e-06, "loss": 0.2627, "step": 70825 }, { "epoch": 0.7, "grad_norm": 19.19354248046875, "learning_rate": 3.951465517241379e-06, "loss": 0.4357, "step": 70850 }, { "epoch": 0.7, "grad_norm": 7.621283054351807, "learning_rate": 3.951034482758621e-06, "loss": 0.2216, "step": 70875 }, { "epoch": 0.7, "grad_norm": 15.320019721984863, "learning_rate": 3.950603448275862e-06, "loss": 0.3541, "step": 70900 }, { "epoch": 0.7, "grad_norm": 13.873743057250977, "learning_rate": 3.950172413793104e-06, "loss": 0.2686, "step": 70925 }, { "epoch": 0.7, "grad_norm": 9.16375732421875, "learning_rate": 3.949741379310345e-06, "loss": 0.3577, "step": 70950 }, { "epoch": 0.7, "grad_norm": 8.487844467163086, "learning_rate": 3.949310344827587e-06, "loss": 0.2319, "step": 70975 }, { "epoch": 0.7, "grad_norm": 14.795717239379883, "learning_rate": 3.948879310344828e-06, "loss": 0.3572, "step": 71000 }, { "epoch": 0.7, "grad_norm": 5.915810585021973, "learning_rate": 3.948448275862069e-06, "loss": 0.2057, "step": 71025 }, { "epoch": 0.7, "grad_norm": 24.432071685791016, "learning_rate": 3.94801724137931e-06, "loss": 0.4153, "step": 71050 }, { "epoch": 0.7, "grad_norm": 4.124364852905273, "learning_rate": 3.9475862068965525e-06, "loss": 0.2146, "step": 71075 }, { "epoch": 0.7, "grad_norm": 19.294666290283203, "learning_rate": 3.947155172413794e-06, "loss": 0.4058, "step": 71100 }, { "epoch": 0.7, "grad_norm": 10.127120018005371, "learning_rate": 3.946724137931035e-06, "loss": 0.2659, "step": 71125 }, { "epoch": 0.7, "grad_norm": 19.042888641357422, "learning_rate": 3.946293103448276e-06, "loss": 0.3941, "step": 71150 }, { "epoch": 0.7, "grad_norm": 6.0304670333862305, "learning_rate": 3.9458620689655174e-06, "loss": 0.2212, "step": 71175 }, { "epoch": 0.7, "grad_norm": 16.7783145904541, "learning_rate": 3.945431034482759e-06, "loss": 0.4066, "step": 71200 }, { "epoch": 0.7, "grad_norm": 8.965187072753906, "learning_rate": 3.945e-06, "loss": 0.2063, "step": 71225 }, { "epoch": 0.7, "grad_norm": 16.196420669555664, "learning_rate": 3.944568965517242e-06, "loss": 0.4443, "step": 71250 }, { "epoch": 0.7, "grad_norm": 13.01553726196289, "learning_rate": 3.944137931034483e-06, "loss": 0.2196, "step": 71275 }, { "epoch": 0.7, "grad_norm": 25.11321449279785, "learning_rate": 3.943706896551725e-06, "loss": 0.4718, "step": 71300 }, { "epoch": 0.7, "grad_norm": 7.770145893096924, "learning_rate": 3.943275862068966e-06, "loss": 0.1889, "step": 71325 }, { "epoch": 0.7, "grad_norm": 15.51905632019043, "learning_rate": 3.942844827586208e-06, "loss": 0.4348, "step": 71350 }, { "epoch": 0.7, "grad_norm": 8.6836576461792, "learning_rate": 3.942413793103448e-06, "loss": 0.2191, "step": 71375 }, { "epoch": 0.7, "grad_norm": 27.74491310119629, "learning_rate": 3.94198275862069e-06, "loss": 0.4089, "step": 71400 }, { "epoch": 0.7, "grad_norm": 8.915651321411133, "learning_rate": 3.941551724137931e-06, "loss": 0.2629, "step": 71425 }, { "epoch": 0.7, "grad_norm": 12.792689323425293, "learning_rate": 3.941120689655173e-06, "loss": 0.3331, "step": 71450 }, { "epoch": 0.7, "grad_norm": 6.134634494781494, "learning_rate": 3.940689655172414e-06, "loss": 0.2084, "step": 71475 }, { "epoch": 0.7, "grad_norm": 21.56051254272461, "learning_rate": 3.9402586206896555e-06, "loss": 0.4005, "step": 71500 }, { "epoch": 0.7, "grad_norm": 5.227102279663086, "learning_rate": 3.939827586206897e-06, "loss": 0.2459, "step": 71525 }, { "epoch": 0.7, "grad_norm": 13.174249649047852, "learning_rate": 3.9393965517241384e-06, "loss": 0.3847, "step": 71550 }, { "epoch": 0.7, "grad_norm": 6.040078639984131, "learning_rate": 3.93896551724138e-06, "loss": 0.192, "step": 71575 }, { "epoch": 0.7, "grad_norm": 11.334155082702637, "learning_rate": 3.938551724137931e-06, "loss": 0.4077, "step": 71600 }, { "epoch": 0.7, "grad_norm": 8.231188774108887, "learning_rate": 3.938120689655173e-06, "loss": 0.195, "step": 71625 }, { "epoch": 0.7, "grad_norm": 20.96892738342285, "learning_rate": 3.937689655172414e-06, "loss": 0.3868, "step": 71650 }, { "epoch": 0.7, "grad_norm": 14.552698135375977, "learning_rate": 3.937258620689656e-06, "loss": 0.2404, "step": 71675 }, { "epoch": 0.71, "grad_norm": 11.768024444580078, "learning_rate": 3.936827586206896e-06, "loss": 0.3087, "step": 71700 }, { "epoch": 0.71, "grad_norm": 11.426313400268555, "learning_rate": 3.936396551724138e-06, "loss": 0.2236, "step": 71725 }, { "epoch": 0.71, "grad_norm": 19.687759399414062, "learning_rate": 3.935965517241379e-06, "loss": 0.3528, "step": 71750 }, { "epoch": 0.71, "grad_norm": 6.455165386199951, "learning_rate": 3.9355344827586215e-06, "loss": 0.2533, "step": 71775 }, { "epoch": 0.71, "grad_norm": 23.685083389282227, "learning_rate": 3.935103448275863e-06, "loss": 0.4001, "step": 71800 }, { "epoch": 0.71, "grad_norm": 11.412861824035645, "learning_rate": 3.934672413793104e-06, "loss": 0.1974, "step": 71825 }, { "epoch": 0.71, "grad_norm": 22.060649871826172, "learning_rate": 3.934241379310345e-06, "loss": 0.328, "step": 71850 }, { "epoch": 0.71, "grad_norm": 7.994454383850098, "learning_rate": 3.9338103448275865e-06, "loss": 0.2077, "step": 71875 }, { "epoch": 0.71, "grad_norm": 14.950668334960938, "learning_rate": 3.933379310344828e-06, "loss": 0.3305, "step": 71900 }, { "epoch": 0.71, "grad_norm": 7.718119144439697, "learning_rate": 3.9329482758620694e-06, "loss": 0.209, "step": 71925 }, { "epoch": 0.71, "grad_norm": 20.593564987182617, "learning_rate": 3.932517241379311e-06, "loss": 0.4271, "step": 71950 }, { "epoch": 0.71, "grad_norm": 8.462926864624023, "learning_rate": 3.932086206896552e-06, "loss": 0.2052, "step": 71975 }, { "epoch": 0.71, "grad_norm": 13.066388130187988, "learning_rate": 3.931655172413794e-06, "loss": 0.4297, "step": 72000 }, { "epoch": 0.71, "grad_norm": 10.036721229553223, "learning_rate": 3.931224137931035e-06, "loss": 0.2387, "step": 72025 }, { "epoch": 0.71, "grad_norm": 20.076406478881836, "learning_rate": 3.930793103448276e-06, "loss": 0.3813, "step": 72050 }, { "epoch": 0.71, "grad_norm": 11.5769681930542, "learning_rate": 3.930362068965517e-06, "loss": 0.241, "step": 72075 }, { "epoch": 0.71, "grad_norm": 15.111163139343262, "learning_rate": 3.929931034482759e-06, "loss": 0.43, "step": 72100 }, { "epoch": 0.71, "grad_norm": 5.99013090133667, "learning_rate": 3.9295e-06, "loss": 0.1825, "step": 72125 }, { "epoch": 0.71, "grad_norm": 20.00226402282715, "learning_rate": 3.929068965517242e-06, "loss": 0.3869, "step": 72150 }, { "epoch": 0.71, "grad_norm": 12.264214515686035, "learning_rate": 3.928637931034483e-06, "loss": 0.2508, "step": 72175 }, { "epoch": 0.71, "grad_norm": 13.554445266723633, "learning_rate": 3.928206896551725e-06, "loss": 0.3949, "step": 72200 }, { "epoch": 0.71, "grad_norm": 4.133803367614746, "learning_rate": 3.927775862068966e-06, "loss": 0.1732, "step": 72225 }, { "epoch": 0.71, "grad_norm": 18.391881942749023, "learning_rate": 3.927344827586207e-06, "loss": 0.4711, "step": 72250 }, { "epoch": 0.71, "grad_norm": 7.21623420715332, "learning_rate": 3.926913793103448e-06, "loss": 0.221, "step": 72275 }, { "epoch": 0.71, "grad_norm": 29.062274932861328, "learning_rate": 3.92648275862069e-06, "loss": 0.3311, "step": 72300 }, { "epoch": 0.71, "grad_norm": 10.909915924072266, "learning_rate": 3.926051724137931e-06, "loss": 0.2244, "step": 72325 }, { "epoch": 0.71, "grad_norm": 19.19988441467285, "learning_rate": 3.925620689655173e-06, "loss": 0.3756, "step": 72350 }, { "epoch": 0.71, "grad_norm": 8.461525917053223, "learning_rate": 3.925189655172414e-06, "loss": 0.2189, "step": 72375 }, { "epoch": 0.71, "grad_norm": 14.059929847717285, "learning_rate": 3.924758620689655e-06, "loss": 0.3642, "step": 72400 }, { "epoch": 0.71, "grad_norm": 9.156120300292969, "learning_rate": 3.924327586206897e-06, "loss": 0.2147, "step": 72425 }, { "epoch": 0.71, "grad_norm": 16.552227020263672, "learning_rate": 3.923896551724138e-06, "loss": 0.4051, "step": 72450 }, { "epoch": 0.71, "grad_norm": 11.77768611907959, "learning_rate": 3.92346551724138e-06, "loss": 0.2619, "step": 72475 }, { "epoch": 0.71, "grad_norm": 11.012726783752441, "learning_rate": 3.923034482758621e-06, "loss": 0.3782, "step": 72500 }, { "epoch": 0.71, "grad_norm": 15.451131820678711, "learning_rate": 3.922603448275863e-06, "loss": 0.2013, "step": 72525 }, { "epoch": 0.71, "grad_norm": 15.223896026611328, "learning_rate": 3.922172413793104e-06, "loss": 0.3743, "step": 72550 }, { "epoch": 0.71, "grad_norm": 16.492504119873047, "learning_rate": 3.921741379310346e-06, "loss": 0.2396, "step": 72575 }, { "epoch": 0.71, "grad_norm": 19.4965877532959, "learning_rate": 3.921310344827586e-06, "loss": 0.4028, "step": 72600 }, { "epoch": 0.71, "grad_norm": 7.50446891784668, "learning_rate": 3.920879310344828e-06, "loss": 0.247, "step": 72625 }, { "epoch": 0.71, "grad_norm": 12.758169174194336, "learning_rate": 3.920448275862069e-06, "loss": 0.3602, "step": 72650 }, { "epoch": 0.71, "grad_norm": 13.316045761108398, "learning_rate": 3.920017241379311e-06, "loss": 0.1824, "step": 72675 }, { "epoch": 0.72, "grad_norm": 25.446706771850586, "learning_rate": 3.919586206896552e-06, "loss": 0.3893, "step": 72700 }, { "epoch": 0.72, "grad_norm": 14.236854553222656, "learning_rate": 3.9191551724137935e-06, "loss": 0.1968, "step": 72725 }, { "epoch": 0.72, "grad_norm": 20.13538932800293, "learning_rate": 3.918724137931035e-06, "loss": 0.3987, "step": 72750 }, { "epoch": 0.72, "grad_norm": 8.71830940246582, "learning_rate": 3.918293103448276e-06, "loss": 0.2345, "step": 72775 }, { "epoch": 0.72, "grad_norm": 17.45284652709961, "learning_rate": 3.917862068965518e-06, "loss": 0.4111, "step": 72800 }, { "epoch": 0.72, "grad_norm": 9.5272216796875, "learning_rate": 3.9174310344827585e-06, "loss": 0.2047, "step": 72825 }, { "epoch": 0.72, "grad_norm": 22.202392578125, "learning_rate": 3.917e-06, "loss": 0.4124, "step": 72850 }, { "epoch": 0.72, "grad_norm": 8.216097831726074, "learning_rate": 3.916568965517241e-06, "loss": 0.1703, "step": 72875 }, { "epoch": 0.72, "grad_norm": 14.325934410095215, "learning_rate": 3.916137931034483e-06, "loss": 0.346, "step": 72900 }, { "epoch": 0.72, "grad_norm": 8.88379192352295, "learning_rate": 3.915706896551724e-06, "loss": 0.2413, "step": 72925 }, { "epoch": 0.72, "grad_norm": 16.217409133911133, "learning_rate": 3.915275862068966e-06, "loss": 0.3919, "step": 72950 }, { "epoch": 0.72, "grad_norm": 12.205350875854492, "learning_rate": 3.914844827586207e-06, "loss": 0.2427, "step": 72975 }, { "epoch": 0.72, "grad_norm": 14.931974411010742, "learning_rate": 3.914413793103449e-06, "loss": 0.3493, "step": 73000 }, { "epoch": 0.72, "grad_norm": 8.379788398742676, "learning_rate": 3.91398275862069e-06, "loss": 0.2203, "step": 73025 }, { "epoch": 0.72, "grad_norm": 18.196617126464844, "learning_rate": 3.913551724137931e-06, "loss": 0.4575, "step": 73050 }, { "epoch": 0.72, "grad_norm": 8.701152801513672, "learning_rate": 3.913120689655173e-06, "loss": 0.2267, "step": 73075 }, { "epoch": 0.72, "grad_norm": 19.88412857055664, "learning_rate": 3.9126896551724145e-06, "loss": 0.3349, "step": 73100 }, { "epoch": 0.72, "grad_norm": 11.21498966217041, "learning_rate": 3.912258620689656e-06, "loss": 0.2572, "step": 73125 }, { "epoch": 0.72, "grad_norm": 22.44599151611328, "learning_rate": 3.9118275862068966e-06, "loss": 0.4111, "step": 73150 }, { "epoch": 0.72, "grad_norm": 10.381311416625977, "learning_rate": 3.911396551724138e-06, "loss": 0.2428, "step": 73175 }, { "epoch": 0.72, "grad_norm": 16.458921432495117, "learning_rate": 3.9109655172413795e-06, "loss": 0.3845, "step": 73200 }, { "epoch": 0.72, "grad_norm": 10.536080360412598, "learning_rate": 3.910534482758621e-06, "loss": 0.2279, "step": 73225 }, { "epoch": 0.72, "grad_norm": 12.489397048950195, "learning_rate": 3.910103448275862e-06, "loss": 0.3944, "step": 73250 }, { "epoch": 0.72, "grad_norm": 7.645989418029785, "learning_rate": 3.909672413793104e-06, "loss": 0.2304, "step": 73275 }, { "epoch": 0.72, "grad_norm": 13.864922523498535, "learning_rate": 3.909241379310345e-06, "loss": 0.3646, "step": 73300 }, { "epoch": 0.72, "grad_norm": 11.85644245147705, "learning_rate": 3.908810344827587e-06, "loss": 0.1972, "step": 73325 }, { "epoch": 0.72, "grad_norm": 16.975725173950195, "learning_rate": 3.908379310344828e-06, "loss": 0.3667, "step": 73350 }, { "epoch": 0.72, "grad_norm": 15.528264045715332, "learning_rate": 3.907948275862069e-06, "loss": 0.2045, "step": 73375 }, { "epoch": 0.72, "grad_norm": 15.60915756225586, "learning_rate": 3.90751724137931e-06, "loss": 0.4109, "step": 73400 }, { "epoch": 0.72, "grad_norm": 8.45878791809082, "learning_rate": 3.907086206896552e-06, "loss": 0.1773, "step": 73425 }, { "epoch": 0.72, "grad_norm": 14.78553581237793, "learning_rate": 3.906655172413793e-06, "loss": 0.3016, "step": 73450 }, { "epoch": 0.72, "grad_norm": 9.643983840942383, "learning_rate": 3.906224137931035e-06, "loss": 0.1627, "step": 73475 }, { "epoch": 0.72, "grad_norm": 9.498482704162598, "learning_rate": 3.905793103448276e-06, "loss": 0.3572, "step": 73500 }, { "epoch": 0.72, "grad_norm": 11.594776153564453, "learning_rate": 3.9053620689655176e-06, "loss": 0.216, "step": 73525 }, { "epoch": 0.72, "grad_norm": 15.761781692504883, "learning_rate": 3.904931034482759e-06, "loss": 0.4078, "step": 73550 }, { "epoch": 0.72, "grad_norm": 9.97243595123291, "learning_rate": 3.9045000000000005e-06, "loss": 0.1784, "step": 73575 }, { "epoch": 0.72, "grad_norm": 21.068836212158203, "learning_rate": 3.904068965517241e-06, "loss": 0.3992, "step": 73600 }, { "epoch": 0.72, "grad_norm": 5.666950225830078, "learning_rate": 3.9036379310344825e-06, "loss": 0.2186, "step": 73625 }, { "epoch": 0.72, "grad_norm": 12.794676780700684, "learning_rate": 3.903206896551725e-06, "loss": 0.4184, "step": 73650 }, { "epoch": 0.72, "grad_norm": 6.593671798706055, "learning_rate": 3.902775862068966e-06, "loss": 0.2209, "step": 73675 }, { "epoch": 0.72, "grad_norm": 22.143413543701172, "learning_rate": 3.902344827586208e-06, "loss": 0.3307, "step": 73700 }, { "epoch": 0.73, "grad_norm": 6.612188816070557, "learning_rate": 3.901913793103448e-06, "loss": 0.2168, "step": 73725 }, { "epoch": 0.73, "grad_norm": 21.097522735595703, "learning_rate": 3.90148275862069e-06, "loss": 0.4212, "step": 73750 }, { "epoch": 0.73, "grad_norm": 4.6903815269470215, "learning_rate": 3.901051724137931e-06, "loss": 0.2134, "step": 73775 }, { "epoch": 0.73, "grad_norm": 15.555495262145996, "learning_rate": 3.900620689655173e-06, "loss": 0.4117, "step": 73800 }, { "epoch": 0.73, "grad_norm": 8.942023277282715, "learning_rate": 3.900189655172414e-06, "loss": 0.2343, "step": 73825 }, { "epoch": 0.73, "grad_norm": 14.42923355102539, "learning_rate": 3.899758620689656e-06, "loss": 0.425, "step": 73850 }, { "epoch": 0.73, "grad_norm": 8.250872611999512, "learning_rate": 3.899327586206897e-06, "loss": 0.2107, "step": 73875 }, { "epoch": 0.73, "grad_norm": 17.29914665222168, "learning_rate": 3.8989137931034486e-06, "loss": 0.3233, "step": 73900 }, { "epoch": 0.73, "grad_norm": 12.481022834777832, "learning_rate": 3.89848275862069e-06, "loss": 0.2237, "step": 73925 }, { "epoch": 0.73, "grad_norm": 16.99465560913086, "learning_rate": 3.8980517241379315e-06, "loss": 0.3661, "step": 73950 }, { "epoch": 0.73, "grad_norm": 10.474861145019531, "learning_rate": 3.897620689655173e-06, "loss": 0.2083, "step": 73975 }, { "epoch": 0.73, "grad_norm": 19.08560562133789, "learning_rate": 3.897189655172414e-06, "loss": 0.3491, "step": 74000 }, { "epoch": 0.73, "grad_norm": 13.229839324951172, "learning_rate": 3.896758620689656e-06, "loss": 0.2066, "step": 74025 }, { "epoch": 0.73, "grad_norm": 16.855501174926758, "learning_rate": 3.8963275862068965e-06, "loss": 0.3552, "step": 74050 }, { "epoch": 0.73, "grad_norm": 8.295781135559082, "learning_rate": 3.895896551724138e-06, "loss": 0.2046, "step": 74075 }, { "epoch": 0.73, "grad_norm": 21.022716522216797, "learning_rate": 3.895465517241379e-06, "loss": 0.3571, "step": 74100 }, { "epoch": 0.73, "grad_norm": 3.3966214656829834, "learning_rate": 3.895034482758621e-06, "loss": 0.2034, "step": 74125 }, { "epoch": 0.73, "grad_norm": 18.713191986083984, "learning_rate": 3.894603448275862e-06, "loss": 0.3966, "step": 74150 }, { "epoch": 0.73, "grad_norm": 11.536535263061523, "learning_rate": 3.894172413793104e-06, "loss": 0.251, "step": 74175 }, { "epoch": 0.73, "grad_norm": 15.093184471130371, "learning_rate": 3.893741379310345e-06, "loss": 0.3382, "step": 74200 }, { "epoch": 0.73, "grad_norm": 10.438972473144531, "learning_rate": 3.893310344827587e-06, "loss": 0.1999, "step": 74225 }, { "epoch": 0.73, "grad_norm": 20.357040405273438, "learning_rate": 3.892879310344828e-06, "loss": 0.3438, "step": 74250 }, { "epoch": 0.73, "grad_norm": 9.408650398254395, "learning_rate": 3.892448275862069e-06, "loss": 0.1979, "step": 74275 }, { "epoch": 0.73, "grad_norm": 17.824859619140625, "learning_rate": 3.89201724137931e-06, "loss": 0.3791, "step": 74300 }, { "epoch": 0.73, "grad_norm": 11.648819923400879, "learning_rate": 3.8915862068965525e-06, "loss": 0.2355, "step": 74325 }, { "epoch": 0.73, "grad_norm": 17.00483512878418, "learning_rate": 3.891155172413794e-06, "loss": 0.3822, "step": 74350 }, { "epoch": 0.73, "grad_norm": 8.04583740234375, "learning_rate": 3.8907241379310345e-06, "loss": 0.2269, "step": 74375 }, { "epoch": 0.73, "grad_norm": 17.933677673339844, "learning_rate": 3.890293103448276e-06, "loss": 0.4234, "step": 74400 }, { "epoch": 0.73, "grad_norm": 11.964275360107422, "learning_rate": 3.8898620689655175e-06, "loss": 0.2507, "step": 74425 }, { "epoch": 0.73, "grad_norm": 13.329731941223145, "learning_rate": 3.889431034482759e-06, "loss": 0.3427, "step": 74450 }, { "epoch": 0.73, "grad_norm": 5.780445575714111, "learning_rate": 3.889e-06, "loss": 0.2257, "step": 74475 }, { "epoch": 0.73, "grad_norm": 18.191017150878906, "learning_rate": 3.888568965517242e-06, "loss": 0.4202, "step": 74500 }, { "epoch": 0.73, "grad_norm": 9.683279037475586, "learning_rate": 3.888137931034483e-06, "loss": 0.1946, "step": 74525 }, { "epoch": 0.73, "grad_norm": 17.684152603149414, "learning_rate": 3.887706896551725e-06, "loss": 0.4273, "step": 74550 }, { "epoch": 0.73, "grad_norm": 3.479963541030884, "learning_rate": 3.887275862068966e-06, "loss": 0.1987, "step": 74575 }, { "epoch": 0.73, "grad_norm": 22.481290817260742, "learning_rate": 3.886844827586207e-06, "loss": 0.4324, "step": 74600 }, { "epoch": 0.73, "grad_norm": 11.73055648803711, "learning_rate": 3.886413793103448e-06, "loss": 0.2195, "step": 74625 }, { "epoch": 0.73, "grad_norm": 16.57257080078125, "learning_rate": 3.88598275862069e-06, "loss": 0.3559, "step": 74650 }, { "epoch": 0.73, "grad_norm": 3.1201348304748535, "learning_rate": 3.885551724137931e-06, "loss": 0.2206, "step": 74675 }, { "epoch": 0.73, "grad_norm": 14.478012084960938, "learning_rate": 3.885120689655173e-06, "loss": 0.323, "step": 74700 }, { "epoch": 0.73, "grad_norm": 10.851395606994629, "learning_rate": 3.884689655172414e-06, "loss": 0.2352, "step": 74725 }, { "epoch": 0.74, "grad_norm": 24.323570251464844, "learning_rate": 3.8842586206896555e-06, "loss": 0.367, "step": 74750 }, { "epoch": 0.74, "grad_norm": 10.551307678222656, "learning_rate": 3.883827586206897e-06, "loss": 0.2129, "step": 74775 }, { "epoch": 0.74, "grad_norm": 23.831165313720703, "learning_rate": 3.8833965517241385e-06, "loss": 0.4025, "step": 74800 }, { "epoch": 0.74, "grad_norm": 9.457107543945312, "learning_rate": 3.882965517241379e-06, "loss": 0.226, "step": 74825 }, { "epoch": 0.74, "grad_norm": 11.123676300048828, "learning_rate": 3.8825344827586205e-06, "loss": 0.4019, "step": 74850 }, { "epoch": 0.74, "grad_norm": 9.754182815551758, "learning_rate": 3.882103448275862e-06, "loss": 0.1575, "step": 74875 }, { "epoch": 0.74, "grad_norm": 22.658985137939453, "learning_rate": 3.881672413793104e-06, "loss": 0.4513, "step": 74900 }, { "epoch": 0.74, "grad_norm": 5.108288764953613, "learning_rate": 3.881241379310346e-06, "loss": 0.2219, "step": 74925 }, { "epoch": 0.74, "grad_norm": 14.025970458984375, "learning_rate": 3.880810344827586e-06, "loss": 0.3554, "step": 74950 }, { "epoch": 0.74, "grad_norm": 12.115516662597656, "learning_rate": 3.880379310344828e-06, "loss": 0.2194, "step": 74975 }, { "epoch": 0.74, "grad_norm": 23.963760375976562, "learning_rate": 3.879948275862069e-06, "loss": 0.3856, "step": 75000 }, { "epoch": 0.74, "grad_norm": 5.699097156524658, "learning_rate": 3.879517241379311e-06, "loss": 0.201, "step": 75025 }, { "epoch": 0.74, "grad_norm": 16.915557861328125, "learning_rate": 3.879086206896552e-06, "loss": 0.4314, "step": 75050 }, { "epoch": 0.74, "grad_norm": 8.134355545043945, "learning_rate": 3.878655172413794e-06, "loss": 0.2134, "step": 75075 }, { "epoch": 0.74, "grad_norm": 16.446762084960938, "learning_rate": 3.878224137931035e-06, "loss": 0.3816, "step": 75100 }, { "epoch": 0.74, "grad_norm": 9.426884651184082, "learning_rate": 3.8777931034482765e-06, "loss": 0.2553, "step": 75125 }, { "epoch": 0.74, "grad_norm": 16.85102653503418, "learning_rate": 3.877362068965518e-06, "loss": 0.3397, "step": 75150 }, { "epoch": 0.74, "grad_norm": 5.041052341461182, "learning_rate": 3.876931034482759e-06, "loss": 0.2373, "step": 75175 }, { "epoch": 0.74, "grad_norm": 4.093904972076416, "learning_rate": 3.8765e-06, "loss": 0.3741, "step": 75200 }, { "epoch": 0.74, "grad_norm": 15.68952751159668, "learning_rate": 3.8760689655172415e-06, "loss": 0.2421, "step": 75225 }, { "epoch": 0.74, "grad_norm": 19.351219177246094, "learning_rate": 3.875637931034483e-06, "loss": 0.369, "step": 75250 }, { "epoch": 0.74, "grad_norm": 8.455098152160645, "learning_rate": 3.8752068965517244e-06, "loss": 0.2263, "step": 75275 }, { "epoch": 0.74, "grad_norm": 15.13259506225586, "learning_rate": 3.874775862068966e-06, "loss": 0.404, "step": 75300 }, { "epoch": 0.74, "grad_norm": 11.72301197052002, "learning_rate": 3.874344827586207e-06, "loss": 0.2291, "step": 75325 }, { "epoch": 0.74, "grad_norm": 15.87276554107666, "learning_rate": 3.873913793103449e-06, "loss": 0.4322, "step": 75350 }, { "epoch": 0.74, "grad_norm": 7.71908712387085, "learning_rate": 3.87348275862069e-06, "loss": 0.1952, "step": 75375 }, { "epoch": 0.74, "grad_norm": 22.192882537841797, "learning_rate": 3.873051724137931e-06, "loss": 0.3857, "step": 75400 }, { "epoch": 0.74, "grad_norm": 6.538609981536865, "learning_rate": 3.872620689655172e-06, "loss": 0.2132, "step": 75425 }, { "epoch": 0.74, "grad_norm": 13.208475112915039, "learning_rate": 3.872189655172414e-06, "loss": 0.3342, "step": 75450 }, { "epoch": 0.74, "grad_norm": 11.196547508239746, "learning_rate": 3.871758620689656e-06, "loss": 0.2106, "step": 75475 }, { "epoch": 0.74, "grad_norm": 17.1752872467041, "learning_rate": 3.871327586206897e-06, "loss": 0.3795, "step": 75500 }, { "epoch": 0.74, "grad_norm": 12.834731101989746, "learning_rate": 3.870896551724138e-06, "loss": 0.1861, "step": 75525 }, { "epoch": 0.74, "grad_norm": 21.7802791595459, "learning_rate": 3.87046551724138e-06, "loss": 0.3994, "step": 75550 }, { "epoch": 0.74, "grad_norm": 4.2795281410217285, "learning_rate": 3.870034482758621e-06, "loss": 0.1863, "step": 75575 }, { "epoch": 0.74, "grad_norm": 9.41684627532959, "learning_rate": 3.8696034482758625e-06, "loss": 0.3551, "step": 75600 }, { "epoch": 0.74, "grad_norm": 12.619595527648926, "learning_rate": 3.869172413793104e-06, "loss": 0.223, "step": 75625 }, { "epoch": 0.74, "grad_norm": 12.795642852783203, "learning_rate": 3.8687413793103454e-06, "loss": 0.3866, "step": 75650 }, { "epoch": 0.74, "grad_norm": 5.653290748596191, "learning_rate": 3.868310344827587e-06, "loss": 0.2262, "step": 75675 }, { "epoch": 0.74, "grad_norm": 16.313831329345703, "learning_rate": 3.867879310344828e-06, "loss": 0.3503, "step": 75700 }, { "epoch": 0.74, "grad_norm": 8.958145141601562, "learning_rate": 3.867448275862069e-06, "loss": 0.2083, "step": 75725 }, { "epoch": 0.75, "grad_norm": 12.508913040161133, "learning_rate": 3.86701724137931e-06, "loss": 0.3798, "step": 75750 }, { "epoch": 0.75, "grad_norm": 8.296215057373047, "learning_rate": 3.866586206896552e-06, "loss": 0.1973, "step": 75775 }, { "epoch": 0.75, "grad_norm": 14.768019676208496, "learning_rate": 3.866155172413793e-06, "loss": 0.3362, "step": 75800 }, { "epoch": 0.75, "grad_norm": 5.438252925872803, "learning_rate": 3.865724137931035e-06, "loss": 0.1906, "step": 75825 }, { "epoch": 0.75, "grad_norm": 21.276193618774414, "learning_rate": 3.865293103448276e-06, "loss": 0.3916, "step": 75850 }, { "epoch": 0.75, "grad_norm": 4.995226860046387, "learning_rate": 3.864862068965518e-06, "loss": 0.2276, "step": 75875 }, { "epoch": 0.75, "grad_norm": 15.813039779663086, "learning_rate": 3.864448275862069e-06, "loss": 0.3863, "step": 75900 }, { "epoch": 0.75, "grad_norm": 7.097251892089844, "learning_rate": 3.864017241379311e-06, "loss": 0.2003, "step": 75925 }, { "epoch": 0.75, "grad_norm": 17.59756851196289, "learning_rate": 3.863586206896552e-06, "loss": 0.4089, "step": 75950 }, { "epoch": 0.75, "grad_norm": 19.69261932373047, "learning_rate": 3.8631551724137935e-06, "loss": 0.203, "step": 75975 }, { "epoch": 0.75, "grad_norm": 15.8082857131958, "learning_rate": 3.862724137931035e-06, "loss": 0.4055, "step": 76000 }, { "epoch": 0.75, "grad_norm": 13.03317642211914, "learning_rate": 3.8622931034482764e-06, "loss": 0.199, "step": 76025 }, { "epoch": 0.75, "grad_norm": 5.997268199920654, "learning_rate": 3.861862068965517e-06, "loss": 0.3606, "step": 76050 }, { "epoch": 0.75, "grad_norm": 14.96776008605957, "learning_rate": 3.8614310344827585e-06, "loss": 0.2202, "step": 76075 }, { "epoch": 0.75, "grad_norm": 17.09507942199707, "learning_rate": 3.861e-06, "loss": 0.4176, "step": 76100 }, { "epoch": 0.75, "grad_norm": 9.91793155670166, "learning_rate": 3.860568965517241e-06, "loss": 0.2286, "step": 76125 }, { "epoch": 0.75, "grad_norm": 16.615556716918945, "learning_rate": 3.860137931034484e-06, "loss": 0.3835, "step": 76150 }, { "epoch": 0.75, "grad_norm": 12.187370300292969, "learning_rate": 3.859706896551724e-06, "loss": 0.1886, "step": 76175 }, { "epoch": 0.75, "grad_norm": 18.12975311279297, "learning_rate": 3.859275862068966e-06, "loss": 0.3828, "step": 76200 }, { "epoch": 0.75, "grad_norm": 8.748442649841309, "learning_rate": 3.858844827586207e-06, "loss": 0.2346, "step": 76225 }, { "epoch": 0.75, "grad_norm": 14.766740798950195, "learning_rate": 3.858413793103449e-06, "loss": 0.3487, "step": 76250 }, { "epoch": 0.75, "grad_norm": 7.829009532928467, "learning_rate": 3.857982758620689e-06, "loss": 0.2441, "step": 76275 }, { "epoch": 0.75, "grad_norm": 16.92831039428711, "learning_rate": 3.857551724137932e-06, "loss": 0.3894, "step": 76300 }, { "epoch": 0.75, "grad_norm": 11.50602912902832, "learning_rate": 3.857120689655173e-06, "loss": 0.2066, "step": 76325 }, { "epoch": 0.75, "grad_norm": 19.75889015197754, "learning_rate": 3.8566896551724145e-06, "loss": 0.3803, "step": 76350 }, { "epoch": 0.75, "grad_norm": 10.201807022094727, "learning_rate": 3.856258620689656e-06, "loss": 0.19, "step": 76375 }, { "epoch": 0.75, "grad_norm": 16.576087951660156, "learning_rate": 3.855827586206897e-06, "loss": 0.4548, "step": 76400 }, { "epoch": 0.75, "grad_norm": 9.81874942779541, "learning_rate": 3.855396551724138e-06, "loss": 0.2549, "step": 76425 }, { "epoch": 0.75, "grad_norm": 19.271263122558594, "learning_rate": 3.8549655172413795e-06, "loss": 0.3875, "step": 76450 }, { "epoch": 0.75, "grad_norm": 5.6923041343688965, "learning_rate": 3.854534482758621e-06, "loss": 0.2091, "step": 76475 }, { "epoch": 0.75, "grad_norm": 23.777332305908203, "learning_rate": 3.854103448275862e-06, "loss": 0.3043, "step": 76500 }, { "epoch": 0.75, "grad_norm": 0.5218271613121033, "learning_rate": 3.853672413793104e-06, "loss": 0.2599, "step": 76525 }, { "epoch": 0.75, "grad_norm": 24.003721237182617, "learning_rate": 3.853241379310345e-06, "loss": 0.4048, "step": 76550 }, { "epoch": 0.75, "grad_norm": 16.21873664855957, "learning_rate": 3.852810344827587e-06, "loss": 0.234, "step": 76575 }, { "epoch": 0.75, "grad_norm": 14.099286079406738, "learning_rate": 3.852379310344828e-06, "loss": 0.4343, "step": 76600 }, { "epoch": 0.75, "grad_norm": 7.012333869934082, "learning_rate": 3.851948275862069e-06, "loss": 0.2465, "step": 76625 }, { "epoch": 0.75, "grad_norm": 20.374526977539062, "learning_rate": 3.85151724137931e-06, "loss": 0.3865, "step": 76650 }, { "epoch": 0.75, "grad_norm": 12.559002876281738, "learning_rate": 3.851086206896552e-06, "loss": 0.221, "step": 76675 }, { "epoch": 0.75, "grad_norm": 16.06046485900879, "learning_rate": 3.850655172413793e-06, "loss": 0.3233, "step": 76700 }, { "epoch": 0.75, "grad_norm": 13.107110023498535, "learning_rate": 3.850224137931035e-06, "loss": 0.2305, "step": 76725 }, { "epoch": 0.75, "grad_norm": 24.079341888427734, "learning_rate": 3.849793103448276e-06, "loss": 0.4321, "step": 76750 }, { "epoch": 0.76, "grad_norm": 14.802623748779297, "learning_rate": 3.849362068965518e-06, "loss": 0.1994, "step": 76775 }, { "epoch": 0.76, "grad_norm": 18.827735900878906, "learning_rate": 3.848931034482759e-06, "loss": 0.3898, "step": 76800 }, { "epoch": 0.76, "grad_norm": 10.508631706237793, "learning_rate": 3.8485000000000005e-06, "loss": 0.2103, "step": 76825 }, { "epoch": 0.76, "grad_norm": 19.75594139099121, "learning_rate": 3.848068965517241e-06, "loss": 0.3985, "step": 76850 }, { "epoch": 0.76, "grad_norm": 11.821717262268066, "learning_rate": 3.847637931034483e-06, "loss": 0.1954, "step": 76875 }, { "epoch": 0.76, "grad_norm": 16.5184383392334, "learning_rate": 3.847206896551725e-06, "loss": 0.445, "step": 76900 }, { "epoch": 0.76, "grad_norm": 3.602663993835449, "learning_rate": 3.846775862068966e-06, "loss": 0.2264, "step": 76925 }, { "epoch": 0.76, "grad_norm": 14.784280776977539, "learning_rate": 3.846344827586207e-06, "loss": 0.3547, "step": 76950 }, { "epoch": 0.76, "grad_norm": 6.471492290496826, "learning_rate": 3.845913793103448e-06, "loss": 0.2143, "step": 76975 }, { "epoch": 0.76, "grad_norm": 17.26978874206543, "learning_rate": 3.84548275862069e-06, "loss": 0.3561, "step": 77000 }, { "epoch": 0.76, "grad_norm": 8.038079261779785, "learning_rate": 3.845051724137931e-06, "loss": 0.2108, "step": 77025 }, { "epoch": 0.76, "grad_norm": 20.66030502319336, "learning_rate": 3.844620689655173e-06, "loss": 0.4435, "step": 77050 }, { "epoch": 0.76, "grad_norm": 5.3788838386535645, "learning_rate": 3.844189655172414e-06, "loss": 0.2212, "step": 77075 }, { "epoch": 0.76, "grad_norm": 12.059304237365723, "learning_rate": 3.843758620689656e-06, "loss": 0.3614, "step": 77100 }, { "epoch": 0.76, "grad_norm": 13.55563735961914, "learning_rate": 3.843327586206897e-06, "loss": 0.2073, "step": 77125 }, { "epoch": 0.76, "grad_norm": 19.958494186401367, "learning_rate": 3.842896551724139e-06, "loss": 0.3592, "step": 77150 }, { "epoch": 0.76, "grad_norm": 9.142690658569336, "learning_rate": 3.842465517241379e-06, "loss": 0.2406, "step": 77175 }, { "epoch": 0.76, "grad_norm": 19.428585052490234, "learning_rate": 3.842034482758621e-06, "loss": 0.4108, "step": 77200 }, { "epoch": 0.76, "grad_norm": 9.340885162353516, "learning_rate": 3.841603448275862e-06, "loss": 0.227, "step": 77225 }, { "epoch": 0.76, "grad_norm": 21.69589614868164, "learning_rate": 3.8411724137931036e-06, "loss": 0.4043, "step": 77250 }, { "epoch": 0.76, "grad_norm": 8.948933601379395, "learning_rate": 3.840741379310345e-06, "loss": 0.2437, "step": 77275 }, { "epoch": 0.76, "grad_norm": 23.190185546875, "learning_rate": 3.8403103448275865e-06, "loss": 0.364, "step": 77300 }, { "epoch": 0.76, "grad_norm": 9.118978500366211, "learning_rate": 3.839879310344828e-06, "loss": 0.1884, "step": 77325 }, { "epoch": 0.76, "grad_norm": 5.973205089569092, "learning_rate": 3.839448275862069e-06, "loss": 0.3806, "step": 77350 }, { "epoch": 0.76, "grad_norm": 15.148404121398926, "learning_rate": 3.839017241379311e-06, "loss": 0.2202, "step": 77375 }, { "epoch": 0.76, "grad_norm": 15.218494415283203, "learning_rate": 3.8385862068965515e-06, "loss": 0.3937, "step": 77400 }, { "epoch": 0.76, "grad_norm": 9.401581764221191, "learning_rate": 3.838155172413793e-06, "loss": 0.2138, "step": 77425 }, { "epoch": 0.76, "grad_norm": 17.334678649902344, "learning_rate": 3.837724137931035e-06, "loss": 0.3629, "step": 77450 }, { "epoch": 0.76, "grad_norm": 10.748231887817383, "learning_rate": 3.837293103448277e-06, "loss": 0.2332, "step": 77475 }, { "epoch": 0.76, "grad_norm": 12.559231758117676, "learning_rate": 3.836862068965518e-06, "loss": 0.3934, "step": 77500 }, { "epoch": 0.76, "grad_norm": 8.755498886108398, "learning_rate": 3.836431034482759e-06, "loss": 0.2436, "step": 77525 }, { "epoch": 0.76, "grad_norm": 15.899897575378418, "learning_rate": 3.836e-06, "loss": 0.4008, "step": 77550 }, { "epoch": 0.76, "grad_norm": 12.51085376739502, "learning_rate": 3.835568965517242e-06, "loss": 0.2149, "step": 77575 }, { "epoch": 0.76, "grad_norm": 20.146421432495117, "learning_rate": 3.835137931034483e-06, "loss": 0.3528, "step": 77600 }, { "epoch": 0.76, "grad_norm": 2.9830093383789062, "learning_rate": 3.8347068965517246e-06, "loss": 0.2094, "step": 77625 }, { "epoch": 0.76, "grad_norm": 16.224821090698242, "learning_rate": 3.834275862068966e-06, "loss": 0.4436, "step": 77650 }, { "epoch": 0.76, "grad_norm": 6.398551940917969, "learning_rate": 3.8338448275862075e-06, "loss": 0.2111, "step": 77675 }, { "epoch": 0.76, "grad_norm": 15.201428413391113, "learning_rate": 3.833413793103449e-06, "loss": 0.3909, "step": 77700 }, { "epoch": 0.76, "grad_norm": 19.87270736694336, "learning_rate": 3.8329827586206896e-06, "loss": 0.2304, "step": 77725 }, { "epoch": 0.76, "grad_norm": 16.37955093383789, "learning_rate": 3.832551724137931e-06, "loss": 0.419, "step": 77750 }, { "epoch": 0.76, "grad_norm": 11.578036308288574, "learning_rate": 3.8321206896551725e-06, "loss": 0.228, "step": 77775 }, { "epoch": 0.77, "grad_norm": 19.06716537475586, "learning_rate": 3.831689655172414e-06, "loss": 0.4109, "step": 77800 }, { "epoch": 0.77, "grad_norm": 8.782447814941406, "learning_rate": 3.831258620689655e-06, "loss": 0.2062, "step": 77825 }, { "epoch": 0.77, "grad_norm": 18.198863983154297, "learning_rate": 3.830827586206897e-06, "loss": 0.3775, "step": 77850 }, { "epoch": 0.77, "grad_norm": 5.4776763916015625, "learning_rate": 3.830396551724138e-06, "loss": 0.2815, "step": 77875 }, { "epoch": 0.77, "grad_norm": 19.538658142089844, "learning_rate": 3.82996551724138e-06, "loss": 0.4092, "step": 77900 }, { "epoch": 0.77, "grad_norm": 8.860326766967773, "learning_rate": 3.829534482758621e-06, "loss": 0.2235, "step": 77925 }, { "epoch": 0.77, "grad_norm": 14.58180046081543, "learning_rate": 3.829103448275862e-06, "loss": 0.3974, "step": 77950 }, { "epoch": 0.77, "grad_norm": 6.884144306182861, "learning_rate": 3.828672413793103e-06, "loss": 0.1769, "step": 77975 }, { "epoch": 0.77, "grad_norm": Infinity, "learning_rate": 3.8282586206896556e-06, "loss": 0.392, "step": 78000 }, { "epoch": 0.77, "grad_norm": 4.580502033233643, "learning_rate": 3.827827586206897e-06, "loss": 0.2235, "step": 78025 }, { "epoch": 0.77, "grad_norm": 16.886005401611328, "learning_rate": 3.8273965517241385e-06, "loss": 0.3516, "step": 78050 }, { "epoch": 0.77, "grad_norm": 11.046525955200195, "learning_rate": 3.826965517241379e-06, "loss": 0.2365, "step": 78075 }, { "epoch": 0.77, "grad_norm": 15.530466079711914, "learning_rate": 3.8265344827586206e-06, "loss": 0.3797, "step": 78100 }, { "epoch": 0.77, "grad_norm": 10.90710163116455, "learning_rate": 3.826103448275863e-06, "loss": 0.2427, "step": 78125 }, { "epoch": 0.77, "grad_norm": 16.456724166870117, "learning_rate": 3.825672413793104e-06, "loss": 0.3402, "step": 78150 }, { "epoch": 0.77, "grad_norm": 14.904948234558105, "learning_rate": 3.825241379310345e-06, "loss": 0.2698, "step": 78175 }, { "epoch": 0.77, "grad_norm": 10.622127532958984, "learning_rate": 3.824810344827586e-06, "loss": 0.2847, "step": 78200 }, { "epoch": 0.77, "grad_norm": 5.998190402984619, "learning_rate": 3.824379310344828e-06, "loss": 0.2137, "step": 78225 }, { "epoch": 0.77, "grad_norm": 12.26526927947998, "learning_rate": 3.823948275862069e-06, "loss": 0.3995, "step": 78250 }, { "epoch": 0.77, "grad_norm": 12.658390045166016, "learning_rate": 3.823517241379311e-06, "loss": 0.1927, "step": 78275 }, { "epoch": 0.77, "grad_norm": 20.616113662719727, "learning_rate": 3.823086206896552e-06, "loss": 0.3327, "step": 78300 }, { "epoch": 0.77, "grad_norm": 10.703880310058594, "learning_rate": 3.822655172413794e-06, "loss": 0.2019, "step": 78325 }, { "epoch": 0.77, "grad_norm": 22.586933135986328, "learning_rate": 3.822224137931035e-06, "loss": 0.427, "step": 78350 }, { "epoch": 0.77, "grad_norm": 11.93077564239502, "learning_rate": 3.8217931034482766e-06, "loss": 0.2148, "step": 78375 }, { "epoch": 0.77, "grad_norm": 16.635343551635742, "learning_rate": 3.821362068965517e-06, "loss": 0.3846, "step": 78400 }, { "epoch": 0.77, "grad_norm": 11.383033752441406, "learning_rate": 3.820931034482759e-06, "loss": 0.221, "step": 78425 }, { "epoch": 0.77, "grad_norm": 23.592153549194336, "learning_rate": 3.8205e-06, "loss": 0.4219, "step": 78450 }, { "epoch": 0.77, "grad_norm": 11.973484992980957, "learning_rate": 3.8200689655172416e-06, "loss": 0.1911, "step": 78475 }, { "epoch": 0.77, "grad_norm": 14.90563678741455, "learning_rate": 3.819637931034483e-06, "loss": 0.353, "step": 78500 }, { "epoch": 0.77, "grad_norm": 8.366264343261719, "learning_rate": 3.8192068965517245e-06, "loss": 0.1872, "step": 78525 }, { "epoch": 0.77, "grad_norm": 19.77849769592285, "learning_rate": 3.818775862068966e-06, "loss": 0.3884, "step": 78550 }, { "epoch": 0.77, "grad_norm": 7.434729099273682, "learning_rate": 3.818344827586207e-06, "loss": 0.1867, "step": 78575 }, { "epoch": 0.77, "grad_norm": 23.150739669799805, "learning_rate": 3.817913793103449e-06, "loss": 0.4498, "step": 78600 }, { "epoch": 0.77, "grad_norm": 12.166155815124512, "learning_rate": 3.8174827586206894e-06, "loss": 0.2225, "step": 78625 }, { "epoch": 0.77, "grad_norm": 12.161020278930664, "learning_rate": 3.817051724137931e-06, "loss": 0.4185, "step": 78650 }, { "epoch": 0.77, "grad_norm": 16.70989990234375, "learning_rate": 3.816620689655172e-06, "loss": 0.2122, "step": 78675 }, { "epoch": 0.77, "grad_norm": 18.8909969329834, "learning_rate": 3.816189655172415e-06, "loss": 0.4401, "step": 78700 }, { "epoch": 0.77, "grad_norm": 12.984563827514648, "learning_rate": 3.815758620689656e-06, "loss": 0.2162, "step": 78725 }, { "epoch": 0.77, "grad_norm": 15.654870986938477, "learning_rate": 3.815327586206897e-06, "loss": 0.3914, "step": 78750 }, { "epoch": 0.77, "grad_norm": 11.566092491149902, "learning_rate": 3.814896551724138e-06, "loss": 0.21, "step": 78775 }, { "epoch": 0.78, "grad_norm": 16.518226623535156, "learning_rate": 3.8144655172413796e-06, "loss": 0.4444, "step": 78800 }, { "epoch": 0.78, "grad_norm": 6.181626319885254, "learning_rate": 3.814034482758621e-06, "loss": 0.2251, "step": 78825 }, { "epoch": 0.78, "grad_norm": 21.627216339111328, "learning_rate": 3.813603448275862e-06, "loss": 0.3578, "step": 78850 }, { "epoch": 0.78, "grad_norm": 7.4994707107543945, "learning_rate": 3.8131724137931036e-06, "loss": 0.2218, "step": 78875 }, { "epoch": 0.78, "grad_norm": 15.39590835571289, "learning_rate": 3.812741379310345e-06, "loss": 0.3816, "step": 78900 }, { "epoch": 0.78, "grad_norm": 9.209046363830566, "learning_rate": 3.8123103448275865e-06, "loss": 0.2447, "step": 78925 }, { "epoch": 0.78, "grad_norm": 21.212505340576172, "learning_rate": 3.811879310344828e-06, "loss": 0.424, "step": 78950 }, { "epoch": 0.78, "grad_norm": 7.9753594398498535, "learning_rate": 3.811448275862069e-06, "loss": 0.2032, "step": 78975 }, { "epoch": 0.78, "grad_norm": 20.16579818725586, "learning_rate": 3.8110172413793104e-06, "loss": 0.4237, "step": 79000 }, { "epoch": 0.78, "grad_norm": 7.272793292999268, "learning_rate": 3.8105862068965523e-06, "loss": 0.2176, "step": 79025 }, { "epoch": 0.78, "grad_norm": 25.073009490966797, "learning_rate": 3.8101551724137938e-06, "loss": 0.3937, "step": 79050 }, { "epoch": 0.78, "grad_norm": 9.847756385803223, "learning_rate": 3.8097241379310344e-06, "loss": 0.2439, "step": 79075 }, { "epoch": 0.78, "grad_norm": 14.250537872314453, "learning_rate": 3.8092931034482763e-06, "loss": 0.4524, "step": 79100 }, { "epoch": 0.78, "grad_norm": 11.859148979187012, "learning_rate": 3.8088620689655177e-06, "loss": 0.2013, "step": 79125 }, { "epoch": 0.78, "grad_norm": 18.753671646118164, "learning_rate": 3.808431034482759e-06, "loss": 0.3708, "step": 79150 }, { "epoch": 0.78, "grad_norm": 10.869102478027344, "learning_rate": 3.8080000000000006e-06, "loss": 0.176, "step": 79175 }, { "epoch": 0.78, "grad_norm": 19.131942749023438, "learning_rate": 3.8075689655172417e-06, "loss": 0.3396, "step": 79200 }, { "epoch": 0.78, "grad_norm": 7.038254261016846, "learning_rate": 3.807137931034483e-06, "loss": 0.1965, "step": 79225 }, { "epoch": 0.78, "grad_norm": 13.269586563110352, "learning_rate": 3.8067068965517246e-06, "loss": 0.3495, "step": 79250 }, { "epoch": 0.78, "grad_norm": 9.6829833984375, "learning_rate": 3.806275862068966e-06, "loss": 0.237, "step": 79275 }, { "epoch": 0.78, "grad_norm": 20.257701873779297, "learning_rate": 3.805844827586207e-06, "loss": 0.4704, "step": 79300 }, { "epoch": 0.78, "grad_norm": 5.945058822631836, "learning_rate": 3.8054137931034485e-06, "loss": 0.2192, "step": 79325 }, { "epoch": 0.78, "grad_norm": 17.46072006225586, "learning_rate": 3.80498275862069e-06, "loss": 0.3243, "step": 79350 }, { "epoch": 0.78, "grad_norm": 13.049192428588867, "learning_rate": 3.8045517241379314e-06, "loss": 0.2326, "step": 79375 }, { "epoch": 0.78, "grad_norm": 31.318235397338867, "learning_rate": 3.8041206896551725e-06, "loss": 0.393, "step": 79400 }, { "epoch": 0.78, "grad_norm": 11.462613105773926, "learning_rate": 3.803689655172414e-06, "loss": 0.2058, "step": 79425 }, { "epoch": 0.78, "grad_norm": 25.983230590820312, "learning_rate": 3.8032586206896554e-06, "loss": 0.3928, "step": 79450 }, { "epoch": 0.78, "grad_norm": 9.069635391235352, "learning_rate": 3.802827586206897e-06, "loss": 0.2208, "step": 79475 }, { "epoch": 0.78, "grad_norm": 10.972640037536621, "learning_rate": 3.8023965517241383e-06, "loss": 0.3399, "step": 79500 }, { "epoch": 0.78, "grad_norm": 4.994297504425049, "learning_rate": 3.8019655172413793e-06, "loss": 0.1943, "step": 79525 }, { "epoch": 0.78, "grad_norm": 14.356913566589355, "learning_rate": 3.801534482758621e-06, "loss": 0.3963, "step": 79550 }, { "epoch": 0.78, "grad_norm": 7.951560974121094, "learning_rate": 3.8011034482758623e-06, "loss": 0.1931, "step": 79575 }, { "epoch": 0.78, "grad_norm": 17.706815719604492, "learning_rate": 3.800672413793104e-06, "loss": 0.3864, "step": 79600 }, { "epoch": 0.78, "grad_norm": 12.834417343139648, "learning_rate": 3.8002413793103447e-06, "loss": 0.271, "step": 79625 }, { "epoch": 0.78, "grad_norm": 9.2623929977417, "learning_rate": 3.799810344827586e-06, "loss": 0.3968, "step": 79650 }, { "epoch": 0.78, "grad_norm": 10.373086929321289, "learning_rate": 3.799379310344828e-06, "loss": 0.261, "step": 79675 }, { "epoch": 0.78, "grad_norm": 18.131826400756836, "learning_rate": 3.7989482758620695e-06, "loss": 0.4086, "step": 79700 }, { "epoch": 0.78, "grad_norm": 9.764466285705566, "learning_rate": 3.798517241379311e-06, "loss": 0.2419, "step": 79725 }, { "epoch": 0.78, "grad_norm": 17.64417839050293, "learning_rate": 3.798086206896552e-06, "loss": 0.3985, "step": 79750 }, { "epoch": 0.78, "grad_norm": 3.755980968475342, "learning_rate": 3.7976551724137935e-06, "loss": 0.2336, "step": 79775 }, { "epoch": 0.78, "grad_norm": 11.731128692626953, "learning_rate": 3.797224137931035e-06, "loss": 0.3548, "step": 79800 }, { "epoch": 0.79, "grad_norm": 2.663895606994629, "learning_rate": 3.7967931034482764e-06, "loss": 0.2119, "step": 79825 }, { "epoch": 0.79, "grad_norm": 15.063030242919922, "learning_rate": 3.7963620689655174e-06, "loss": 0.427, "step": 79850 }, { "epoch": 0.79, "grad_norm": 8.570902824401855, "learning_rate": 3.795931034482759e-06, "loss": 0.2523, "step": 79875 }, { "epoch": 0.79, "grad_norm": 20.43361473083496, "learning_rate": 3.7955000000000003e-06, "loss": 0.3862, "step": 79900 }, { "epoch": 0.79, "grad_norm": 17.339824676513672, "learning_rate": 3.795068965517242e-06, "loss": 0.2396, "step": 79925 }, { "epoch": 0.79, "grad_norm": 20.604248046875, "learning_rate": 3.7946379310344832e-06, "loss": 0.448, "step": 79950 }, { "epoch": 0.79, "grad_norm": 3.6047396659851074, "learning_rate": 3.7942068965517243e-06, "loss": 0.1998, "step": 79975 }, { "epoch": 0.79, "grad_norm": 27.308135986328125, "learning_rate": 3.7937758620689657e-06, "loss": 0.3452, "step": 80000 }, { "epoch": 0.79, "eval_loss": 0.4677213132381439, "eval_runtime": 5709.2185, "eval_samples_per_second": 1.658, "eval_steps_per_second": 0.207, "eval_wer": 0.13918753621322347, "step": 80000 }, { "epoch": 0.79, "grad_norm": 10.571468353271484, "learning_rate": 3.793344827586207e-06, "loss": 0.1705, "step": 80025 }, { "epoch": 0.79, "grad_norm": 23.163549423217773, "learning_rate": 3.7929137931034487e-06, "loss": 0.4145, "step": 80050 }, { "epoch": 0.79, "grad_norm": 9.786457061767578, "learning_rate": 3.7924827586206897e-06, "loss": 0.2114, "step": 80075 }, { "epoch": 0.79, "grad_norm": 19.529987335205078, "learning_rate": 3.792051724137931e-06, "loss": 0.3772, "step": 80100 }, { "epoch": 0.79, "grad_norm": 7.464859485626221, "learning_rate": 3.7916206896551726e-06, "loss": 0.2193, "step": 80125 }, { "epoch": 0.79, "grad_norm": 21.19687271118164, "learning_rate": 3.791189655172414e-06, "loss": 0.3843, "step": 80150 }, { "epoch": 0.79, "grad_norm": 8.84427261352539, "learning_rate": 3.7907586206896555e-06, "loss": 0.2884, "step": 80175 }, { "epoch": 0.79, "grad_norm": 16.607746124267578, "learning_rate": 3.790344827586207e-06, "loss": 0.3737, "step": 80200 }, { "epoch": 0.79, "grad_norm": 7.181899070739746, "learning_rate": 3.7899137931034484e-06, "loss": 0.2335, "step": 80225 }, { "epoch": 0.79, "grad_norm": 13.541332244873047, "learning_rate": 3.78948275862069e-06, "loss": 0.3545, "step": 80250 }, { "epoch": 0.79, "grad_norm": 8.02812671661377, "learning_rate": 3.7890517241379313e-06, "loss": 0.2168, "step": 80275 }, { "epoch": 0.79, "grad_norm": 19.824216842651367, "learning_rate": 3.7886206896551724e-06, "loss": 0.4224, "step": 80300 }, { "epoch": 0.79, "grad_norm": 10.019057273864746, "learning_rate": 3.788189655172414e-06, "loss": 0.2531, "step": 80325 }, { "epoch": 0.79, "grad_norm": 8.362988471984863, "learning_rate": 3.7877586206896553e-06, "loss": 0.2986, "step": 80350 }, { "epoch": 0.79, "grad_norm": 5.846071720123291, "learning_rate": 3.787327586206897e-06, "loss": 0.226, "step": 80375 }, { "epoch": 0.79, "grad_norm": 16.54294776916504, "learning_rate": 3.7868965517241386e-06, "loss": 0.3882, "step": 80400 }, { "epoch": 0.79, "grad_norm": 7.491347312927246, "learning_rate": 3.7864655172413797e-06, "loss": 0.2526, "step": 80425 }, { "epoch": 0.79, "grad_norm": 10.804394721984863, "learning_rate": 3.786034482758621e-06, "loss": 0.3507, "step": 80450 }, { "epoch": 0.79, "grad_norm": 9.863417625427246, "learning_rate": 3.7856034482758626e-06, "loss": 0.2163, "step": 80475 }, { "epoch": 0.79, "grad_norm": 10.801508903503418, "learning_rate": 3.785172413793104e-06, "loss": 0.3839, "step": 80500 }, { "epoch": 0.79, "grad_norm": 11.782088279724121, "learning_rate": 3.784741379310345e-06, "loss": 0.2359, "step": 80525 }, { "epoch": 0.79, "grad_norm": 16.04340934753418, "learning_rate": 3.7843103448275865e-06, "loss": 0.3723, "step": 80550 }, { "epoch": 0.79, "grad_norm": 13.56645393371582, "learning_rate": 3.783879310344828e-06, "loss": 0.2287, "step": 80575 }, { "epoch": 0.79, "grad_norm": 18.87079620361328, "learning_rate": 3.7834482758620694e-06, "loss": 0.4382, "step": 80600 }, { "epoch": 0.79, "grad_norm": 9.848071098327637, "learning_rate": 3.783017241379311e-06, "loss": 0.2267, "step": 80625 }, { "epoch": 0.79, "grad_norm": 16.274293899536133, "learning_rate": 3.782586206896552e-06, "loss": 0.3426, "step": 80650 }, { "epoch": 0.79, "grad_norm": 4.906290531158447, "learning_rate": 3.7821551724137934e-06, "loss": 0.2063, "step": 80675 }, { "epoch": 0.79, "grad_norm": 18.6780948638916, "learning_rate": 3.781724137931035e-06, "loss": 0.3785, "step": 80700 }, { "epoch": 0.79, "grad_norm": 6.677008152008057, "learning_rate": 3.7812931034482763e-06, "loss": 0.1953, "step": 80725 }, { "epoch": 0.79, "grad_norm": 10.602324485778809, "learning_rate": 3.7808620689655173e-06, "loss": 0.3376, "step": 80750 }, { "epoch": 0.79, "grad_norm": 13.783148765563965, "learning_rate": 3.7804310344827588e-06, "loss": 0.2006, "step": 80775 }, { "epoch": 0.79, "grad_norm": 16.536680221557617, "learning_rate": 3.7800000000000002e-06, "loss": 0.3424, "step": 80800 }, { "epoch": 0.79, "grad_norm": 6.760659217834473, "learning_rate": 3.7795689655172417e-06, "loss": 0.2094, "step": 80825 }, { "epoch": 0.8, "grad_norm": 9.454875946044922, "learning_rate": 3.779137931034483e-06, "loss": 0.3892, "step": 80850 }, { "epoch": 0.8, "grad_norm": 13.856978416442871, "learning_rate": 3.778706896551724e-06, "loss": 0.2428, "step": 80875 }, { "epoch": 0.8, "grad_norm": 16.56269073486328, "learning_rate": 3.7782758620689656e-06, "loss": 0.3785, "step": 80900 }, { "epoch": 0.8, "grad_norm": 3.7832088470458984, "learning_rate": 3.777844827586207e-06, "loss": 0.2155, "step": 80925 }, { "epoch": 0.8, "grad_norm": 10.811250686645508, "learning_rate": 3.777413793103449e-06, "loss": 0.3857, "step": 80950 }, { "epoch": 0.8, "grad_norm": 13.04773235321045, "learning_rate": 3.7769827586206896e-06, "loss": 0.2631, "step": 80975 }, { "epoch": 0.8, "grad_norm": 15.129091262817383, "learning_rate": 3.7765517241379315e-06, "loss": 0.3775, "step": 81000 }, { "epoch": 0.8, "grad_norm": 8.167068481445312, "learning_rate": 3.776120689655173e-06, "loss": 0.2113, "step": 81025 }, { "epoch": 0.8, "grad_norm": 11.245107650756836, "learning_rate": 3.7756896551724144e-06, "loss": 0.3687, "step": 81050 }, { "epoch": 0.8, "grad_norm": 6.570511817932129, "learning_rate": 3.7752586206896554e-06, "loss": 0.1858, "step": 81075 }, { "epoch": 0.8, "grad_norm": 22.205184936523438, "learning_rate": 3.774827586206897e-06, "loss": 0.3212, "step": 81100 }, { "epoch": 0.8, "grad_norm": 5.846063137054443, "learning_rate": 3.7743965517241383e-06, "loss": 0.2346, "step": 81125 }, { "epoch": 0.8, "grad_norm": 11.78244686126709, "learning_rate": 3.7739655172413798e-06, "loss": 0.3117, "step": 81150 }, { "epoch": 0.8, "grad_norm": 9.410977363586426, "learning_rate": 3.7735344827586212e-06, "loss": 0.2524, "step": 81175 }, { "epoch": 0.8, "grad_norm": 15.438472747802734, "learning_rate": 3.7731034482758623e-06, "loss": 0.3842, "step": 81200 }, { "epoch": 0.8, "grad_norm": 10.262799263000488, "learning_rate": 3.7726724137931037e-06, "loss": 0.2377, "step": 81225 }, { "epoch": 0.8, "grad_norm": 17.160913467407227, "learning_rate": 3.772241379310345e-06, "loss": 0.4359, "step": 81250 }, { "epoch": 0.8, "grad_norm": 10.744318008422852, "learning_rate": 3.7718103448275866e-06, "loss": 0.227, "step": 81275 }, { "epoch": 0.8, "grad_norm": 14.363934516906738, "learning_rate": 3.7713793103448277e-06, "loss": 0.3788, "step": 81300 }, { "epoch": 0.8, "grad_norm": 3.834433078765869, "learning_rate": 3.770948275862069e-06, "loss": 0.2452, "step": 81325 }, { "epoch": 0.8, "grad_norm": 12.014893531799316, "learning_rate": 3.7705172413793106e-06, "loss": 0.4104, "step": 81350 }, { "epoch": 0.8, "grad_norm": 5.509968280792236, "learning_rate": 3.770086206896552e-06, "loss": 0.2123, "step": 81375 }, { "epoch": 0.8, "grad_norm": 15.82555866241455, "learning_rate": 3.7696551724137935e-06, "loss": 0.4297, "step": 81400 }, { "epoch": 0.8, "grad_norm": 8.417984962463379, "learning_rate": 3.7692241379310345e-06, "loss": 0.1825, "step": 81425 }, { "epoch": 0.8, "grad_norm": 16.704851150512695, "learning_rate": 3.768793103448276e-06, "loss": 0.3467, "step": 81450 }, { "epoch": 0.8, "grad_norm": 10.071759223937988, "learning_rate": 3.7683620689655174e-06, "loss": 0.208, "step": 81475 }, { "epoch": 0.8, "grad_norm": 18.812530517578125, "learning_rate": 3.767931034482759e-06, "loss": 0.407, "step": 81500 }, { "epoch": 0.8, "grad_norm": 6.064326286315918, "learning_rate": 3.7675e-06, "loss": 0.1807, "step": 81525 }, { "epoch": 0.8, "grad_norm": 16.17791748046875, "learning_rate": 3.7670689655172414e-06, "loss": 0.3292, "step": 81550 }, { "epoch": 0.8, "grad_norm": 12.641715049743652, "learning_rate": 3.7666379310344833e-06, "loss": 0.2263, "step": 81575 }, { "epoch": 0.8, "grad_norm": 20.443761825561523, "learning_rate": 3.7662068965517247e-06, "loss": 0.3724, "step": 81600 }, { "epoch": 0.8, "grad_norm": 7.360668182373047, "learning_rate": 3.765775862068966e-06, "loss": 0.2074, "step": 81625 }, { "epoch": 0.8, "grad_norm": 11.972588539123535, "learning_rate": 3.765344827586207e-06, "loss": 0.448, "step": 81650 }, { "epoch": 0.8, "grad_norm": 14.594918251037598, "learning_rate": 3.7649137931034487e-06, "loss": 0.2491, "step": 81675 }, { "epoch": 0.8, "grad_norm": 19.017763137817383, "learning_rate": 3.76448275862069e-06, "loss": 0.3991, "step": 81700 }, { "epoch": 0.8, "grad_norm": 5.549394607543945, "learning_rate": 3.7640517241379316e-06, "loss": 0.219, "step": 81725 }, { "epoch": 0.8, "grad_norm": 17.273157119750977, "learning_rate": 3.7636206896551726e-06, "loss": 0.3341, "step": 81750 }, { "epoch": 0.8, "grad_norm": 8.224653244018555, "learning_rate": 3.763189655172414e-06, "loss": 0.2128, "step": 81775 }, { "epoch": 0.8, "grad_norm": 11.9697265625, "learning_rate": 3.7627586206896555e-06, "loss": 0.3139, "step": 81800 }, { "epoch": 0.8, "grad_norm": 4.7354936599731445, "learning_rate": 3.762327586206897e-06, "loss": 0.2457, "step": 81825 }, { "epoch": 0.81, "grad_norm": 17.31676483154297, "learning_rate": 3.7618965517241384e-06, "loss": 0.3798, "step": 81850 }, { "epoch": 0.81, "grad_norm": 10.486529350280762, "learning_rate": 3.7614655172413795e-06, "loss": 0.2266, "step": 81875 }, { "epoch": 0.81, "grad_norm": 15.715156555175781, "learning_rate": 3.761034482758621e-06, "loss": 0.3617, "step": 81900 }, { "epoch": 0.81, "grad_norm": 10.5025053024292, "learning_rate": 3.7606034482758624e-06, "loss": 0.1694, "step": 81925 }, { "epoch": 0.81, "grad_norm": 15.828513145446777, "learning_rate": 3.760172413793104e-06, "loss": 0.3552, "step": 81950 }, { "epoch": 0.81, "grad_norm": 6.490579128265381, "learning_rate": 3.759741379310345e-06, "loss": 0.1914, "step": 81975 }, { "epoch": 0.81, "grad_norm": 16.14114761352539, "learning_rate": 3.7593103448275863e-06, "loss": 0.3724, "step": 82000 }, { "epoch": 0.81, "grad_norm": 11.295228004455566, "learning_rate": 3.7588793103448278e-06, "loss": 0.2643, "step": 82025 }, { "epoch": 0.81, "grad_norm": 11.203154563903809, "learning_rate": 3.7584482758620692e-06, "loss": 0.3832, "step": 82050 }, { "epoch": 0.81, "grad_norm": 10.047567367553711, "learning_rate": 3.7580172413793107e-06, "loss": 0.2345, "step": 82075 }, { "epoch": 0.81, "grad_norm": 18.868724822998047, "learning_rate": 3.7575862068965517e-06, "loss": 0.4966, "step": 82100 }, { "epoch": 0.81, "grad_norm": 9.52897834777832, "learning_rate": 3.757155172413793e-06, "loss": 0.1889, "step": 82125 }, { "epoch": 0.81, "grad_norm": 13.897953987121582, "learning_rate": 3.7567241379310346e-06, "loss": 0.3146, "step": 82150 }, { "epoch": 0.81, "grad_norm": 6.417572975158691, "learning_rate": 3.7562931034482765e-06, "loss": 0.2426, "step": 82175 }, { "epoch": 0.81, "grad_norm": 16.478464126586914, "learning_rate": 3.755862068965517e-06, "loss": 0.3861, "step": 82200 }, { "epoch": 0.81, "grad_norm": 5.781115531921387, "learning_rate": 3.755431034482759e-06, "loss": 0.2336, "step": 82225 }, { "epoch": 0.81, "grad_norm": 13.953618049621582, "learning_rate": 3.7550172413793105e-06, "loss": 0.3672, "step": 82250 }, { "epoch": 0.81, "grad_norm": 7.070902347564697, "learning_rate": 3.7545862068965524e-06, "loss": 0.1974, "step": 82275 }, { "epoch": 0.81, "grad_norm": 14.276594161987305, "learning_rate": 3.754155172413794e-06, "loss": 0.3917, "step": 82300 }, { "epoch": 0.81, "grad_norm": 14.68715763092041, "learning_rate": 3.7537241379310344e-06, "loss": 0.1911, "step": 82325 }, { "epoch": 0.81, "grad_norm": 18.48189926147461, "learning_rate": 3.7532931034482763e-06, "loss": 0.4166, "step": 82350 }, { "epoch": 0.81, "grad_norm": 4.33799409866333, "learning_rate": 3.7528620689655178e-06, "loss": 0.2267, "step": 82375 }, { "epoch": 0.81, "grad_norm": 20.837692260742188, "learning_rate": 3.7524310344827592e-06, "loss": 0.3628, "step": 82400 }, { "epoch": 0.81, "grad_norm": 17.58894920349121, "learning_rate": 3.7520000000000002e-06, "loss": 0.1732, "step": 82425 }, { "epoch": 0.81, "grad_norm": 11.340507507324219, "learning_rate": 3.7515689655172417e-06, "loss": 0.3662, "step": 82450 }, { "epoch": 0.81, "grad_norm": 12.027047157287598, "learning_rate": 3.751137931034483e-06, "loss": 0.2117, "step": 82475 }, { "epoch": 0.81, "grad_norm": 15.178215026855469, "learning_rate": 3.7507068965517246e-06, "loss": 0.3231, "step": 82500 }, { "epoch": 0.81, "grad_norm": 7.835970401763916, "learning_rate": 3.750275862068966e-06, "loss": 0.2274, "step": 82525 }, { "epoch": 0.81, "grad_norm": 19.866134643554688, "learning_rate": 3.749844827586207e-06, "loss": 0.41, "step": 82550 }, { "epoch": 0.81, "grad_norm": 8.698101997375488, "learning_rate": 3.7494137931034486e-06, "loss": 0.2029, "step": 82575 }, { "epoch": 0.81, "grad_norm": 15.742383003234863, "learning_rate": 3.74898275862069e-06, "loss": 0.3825, "step": 82600 }, { "epoch": 0.81, "grad_norm": 7.676470756530762, "learning_rate": 3.7485517241379315e-06, "loss": 0.2396, "step": 82625 }, { "epoch": 0.81, "grad_norm": 10.119482040405273, "learning_rate": 3.7481206896551725e-06, "loss": 0.4493, "step": 82650 }, { "epoch": 0.81, "grad_norm": 13.164438247680664, "learning_rate": 3.747689655172414e-06, "loss": 0.1668, "step": 82675 }, { "epoch": 0.81, "grad_norm": 21.9153995513916, "learning_rate": 3.7472586206896554e-06, "loss": 0.3318, "step": 82700 }, { "epoch": 0.81, "grad_norm": 9.953998565673828, "learning_rate": 3.746827586206897e-06, "loss": 0.2312, "step": 82725 }, { "epoch": 0.81, "grad_norm": 11.033534049987793, "learning_rate": 3.746396551724138e-06, "loss": 0.3997, "step": 82750 }, { "epoch": 0.81, "grad_norm": 9.23721694946289, "learning_rate": 3.7459655172413794e-06, "loss": 0.2096, "step": 82775 }, { "epoch": 0.81, "grad_norm": 19.71874237060547, "learning_rate": 3.745534482758621e-06, "loss": 0.4137, "step": 82800 }, { "epoch": 0.81, "grad_norm": 8.123889923095703, "learning_rate": 3.7451034482758623e-06, "loss": 0.2316, "step": 82825 }, { "epoch": 0.81, "grad_norm": 18.728809356689453, "learning_rate": 3.744672413793104e-06, "loss": 0.4402, "step": 82850 }, { "epoch": 0.82, "grad_norm": 6.867141246795654, "learning_rate": 3.7442413793103448e-06, "loss": 0.1825, "step": 82875 }, { "epoch": 0.82, "grad_norm": 16.32155990600586, "learning_rate": 3.7438103448275862e-06, "loss": 0.3596, "step": 82900 }, { "epoch": 0.82, "grad_norm": 9.988985061645508, "learning_rate": 3.743379310344828e-06, "loss": 0.1677, "step": 82925 }, { "epoch": 0.82, "grad_norm": 20.862834930419922, "learning_rate": 3.7429482758620696e-06, "loss": 0.3944, "step": 82950 }, { "epoch": 0.82, "grad_norm": 9.39453125, "learning_rate": 3.7425172413793106e-06, "loss": 0.2003, "step": 82975 }, { "epoch": 0.82, "grad_norm": 16.261919021606445, "learning_rate": 3.742086206896552e-06, "loss": 0.3255, "step": 83000 }, { "epoch": 0.82, "grad_norm": 6.902100086212158, "learning_rate": 3.7416551724137935e-06, "loss": 0.2001, "step": 83025 }, { "epoch": 0.82, "grad_norm": 13.411689758300781, "learning_rate": 3.741224137931035e-06, "loss": 0.3555, "step": 83050 }, { "epoch": 0.82, "grad_norm": 7.7809014320373535, "learning_rate": 3.7407931034482764e-06, "loss": 0.217, "step": 83075 }, { "epoch": 0.82, "grad_norm": 17.19647789001465, "learning_rate": 3.7403620689655175e-06, "loss": 0.3739, "step": 83100 }, { "epoch": 0.82, "grad_norm": 6.395444393157959, "learning_rate": 3.739931034482759e-06, "loss": 0.2271, "step": 83125 }, { "epoch": 0.82, "grad_norm": 17.08983612060547, "learning_rate": 3.7395000000000004e-06, "loss": 0.4059, "step": 83150 }, { "epoch": 0.82, "grad_norm": 12.948412895202637, "learning_rate": 3.739068965517242e-06, "loss": 0.186, "step": 83175 }, { "epoch": 0.82, "grad_norm": 20.68878746032715, "learning_rate": 3.738637931034483e-06, "loss": 0.4493, "step": 83200 }, { "epoch": 0.82, "grad_norm": 7.506966590881348, "learning_rate": 3.7382068965517243e-06, "loss": 0.1717, "step": 83225 }, { "epoch": 0.82, "grad_norm": 20.083837509155273, "learning_rate": 3.7377758620689658e-06, "loss": 0.2965, "step": 83250 }, { "epoch": 0.82, "grad_norm": 8.80933666229248, "learning_rate": 3.7373448275862072e-06, "loss": 0.2207, "step": 83275 }, { "epoch": 0.82, "grad_norm": 16.94013214111328, "learning_rate": 3.7369137931034487e-06, "loss": 0.3325, "step": 83300 }, { "epoch": 0.82, "grad_norm": 8.45557975769043, "learning_rate": 3.7364827586206897e-06, "loss": 0.1701, "step": 83325 }, { "epoch": 0.82, "grad_norm": 16.518033981323242, "learning_rate": 3.736051724137931e-06, "loss": 0.3401, "step": 83350 }, { "epoch": 0.82, "grad_norm": 10.94876766204834, "learning_rate": 3.7356206896551726e-06, "loss": 0.2304, "step": 83375 }, { "epoch": 0.82, "grad_norm": 15.944127082824707, "learning_rate": 3.735189655172414e-06, "loss": 0.362, "step": 83400 }, { "epoch": 0.82, "grad_norm": 8.331757545471191, "learning_rate": 3.734758620689655e-06, "loss": 0.2391, "step": 83425 }, { "epoch": 0.82, "grad_norm": 12.49602222442627, "learning_rate": 3.7343275862068966e-06, "loss": 0.3496, "step": 83450 }, { "epoch": 0.82, "grad_norm": 7.4324564933776855, "learning_rate": 3.733896551724138e-06, "loss": 0.2313, "step": 83475 }, { "epoch": 0.82, "grad_norm": 15.735597610473633, "learning_rate": 3.73346551724138e-06, "loss": 0.3991, "step": 83500 }, { "epoch": 0.82, "grad_norm": 9.752203941345215, "learning_rate": 3.7330344827586214e-06, "loss": 0.1857, "step": 83525 }, { "epoch": 0.82, "grad_norm": 14.160383224487305, "learning_rate": 3.732603448275862e-06, "loss": 0.3686, "step": 83550 }, { "epoch": 0.82, "grad_norm": 7.961763858795166, "learning_rate": 3.732172413793104e-06, "loss": 0.2313, "step": 83575 }, { "epoch": 0.82, "grad_norm": 5.360998153686523, "learning_rate": 3.7317413793103453e-06, "loss": 0.3649, "step": 83600 }, { "epoch": 0.82, "grad_norm": 12.191195487976074, "learning_rate": 3.7313103448275868e-06, "loss": 0.239, "step": 83625 }, { "epoch": 0.82, "grad_norm": 20.85214614868164, "learning_rate": 3.730879310344828e-06, "loss": 0.388, "step": 83650 }, { "epoch": 0.82, "grad_norm": 46.53024673461914, "learning_rate": 3.7304482758620693e-06, "loss": 0.2172, "step": 83675 }, { "epoch": 0.82, "grad_norm": 14.957637786865234, "learning_rate": 3.7300172413793107e-06, "loss": 0.3548, "step": 83700 }, { "epoch": 0.82, "grad_norm": 7.92728853225708, "learning_rate": 3.729586206896552e-06, "loss": 0.2227, "step": 83725 }, { "epoch": 0.82, "grad_norm": 14.062910079956055, "learning_rate": 3.7291551724137936e-06, "loss": 0.3855, "step": 83750 }, { "epoch": 0.82, "grad_norm": 3.5761709213256836, "learning_rate": 3.7287241379310347e-06, "loss": 0.2076, "step": 83775 }, { "epoch": 0.82, "grad_norm": 11.64234447479248, "learning_rate": 3.728293103448276e-06, "loss": 0.415, "step": 83800 }, { "epoch": 0.82, "grad_norm": 2.7859508991241455, "learning_rate": 3.7278620689655176e-06, "loss": 0.2064, "step": 83825 }, { "epoch": 0.82, "grad_norm": 16.813888549804688, "learning_rate": 3.727431034482759e-06, "loss": 0.3422, "step": 83850 }, { "epoch": 0.82, "grad_norm": 5.980964183807373, "learning_rate": 3.727e-06, "loss": 0.2002, "step": 83875 }, { "epoch": 0.83, "grad_norm": 17.003068923950195, "learning_rate": 3.7265689655172415e-06, "loss": 0.3798, "step": 83900 }, { "epoch": 0.83, "grad_norm": 17.197561264038086, "learning_rate": 3.726137931034483e-06, "loss": 0.2233, "step": 83925 }, { "epoch": 0.83, "grad_norm": 15.755708694458008, "learning_rate": 3.7257068965517244e-06, "loss": 0.3894, "step": 83950 }, { "epoch": 0.83, "grad_norm": 8.598731994628906, "learning_rate": 3.725275862068966e-06, "loss": 0.2069, "step": 83975 }, { "epoch": 0.83, "grad_norm": 16.165172576904297, "learning_rate": 3.724844827586207e-06, "loss": 0.3737, "step": 84000 }, { "epoch": 0.83, "grad_norm": 6.5745391845703125, "learning_rate": 3.7244137931034484e-06, "loss": 0.2112, "step": 84025 }, { "epoch": 0.83, "grad_norm": 17.953969955444336, "learning_rate": 3.72398275862069e-06, "loss": 0.3577, "step": 84050 }, { "epoch": 0.83, "grad_norm": 6.679604530334473, "learning_rate": 3.7235517241379317e-06, "loss": 0.2235, "step": 84075 }, { "epoch": 0.83, "grad_norm": 20.126007080078125, "learning_rate": 3.7231206896551723e-06, "loss": 0.4004, "step": 84100 }, { "epoch": 0.83, "grad_norm": 10.847455024719238, "learning_rate": 3.7226896551724138e-06, "loss": 0.2142, "step": 84125 }, { "epoch": 0.83, "grad_norm": 18.20785140991211, "learning_rate": 3.7222586206896557e-06, "loss": 0.3726, "step": 84150 }, { "epoch": 0.83, "grad_norm": 10.54376220703125, "learning_rate": 3.721827586206897e-06, "loss": 0.2257, "step": 84175 }, { "epoch": 0.83, "grad_norm": 16.55872917175293, "learning_rate": 3.7213965517241386e-06, "loss": 0.3384, "step": 84200 }, { "epoch": 0.83, "grad_norm": 7.76241397857666, "learning_rate": 3.7209655172413796e-06, "loss": 0.1907, "step": 84225 }, { "epoch": 0.83, "grad_norm": Infinity, "learning_rate": 3.7205517241379315e-06, "loss": 0.3464, "step": 84250 }, { "epoch": 0.83, "grad_norm": 9.23720932006836, "learning_rate": 3.720120689655173e-06, "loss": 0.1986, "step": 84275 }, { "epoch": 0.83, "grad_norm": 22.519506454467773, "learning_rate": 3.7196896551724144e-06, "loss": 0.3202, "step": 84300 }, { "epoch": 0.83, "grad_norm": 7.232170104980469, "learning_rate": 3.7192586206896554e-06, "loss": 0.2128, "step": 84325 }, { "epoch": 0.83, "grad_norm": 21.056293487548828, "learning_rate": 3.718827586206897e-06, "loss": 0.3715, "step": 84350 }, { "epoch": 0.83, "grad_norm": 13.881552696228027, "learning_rate": 3.7183965517241383e-06, "loss": 0.2573, "step": 84375 }, { "epoch": 0.83, "grad_norm": 17.807842254638672, "learning_rate": 3.71796551724138e-06, "loss": 0.379, "step": 84400 }, { "epoch": 0.83, "grad_norm": 9.75545597076416, "learning_rate": 3.717534482758621e-06, "loss": 0.2399, "step": 84425 }, { "epoch": 0.83, "grad_norm": 16.970256805419922, "learning_rate": 3.7171034482758623e-06, "loss": 0.4204, "step": 84450 }, { "epoch": 0.83, "grad_norm": 10.22221851348877, "learning_rate": 3.7166724137931037e-06, "loss": 0.1846, "step": 84475 }, { "epoch": 0.83, "grad_norm": 22.13064956665039, "learning_rate": 3.716241379310345e-06, "loss": 0.4513, "step": 84500 }, { "epoch": 0.83, "grad_norm": 9.029796600341797, "learning_rate": 3.7158103448275867e-06, "loss": 0.2192, "step": 84525 }, { "epoch": 0.83, "grad_norm": 12.839634895324707, "learning_rate": 3.7153793103448277e-06, "loss": 0.3426, "step": 84550 }, { "epoch": 0.83, "grad_norm": 7.57751989364624, "learning_rate": 3.714948275862069e-06, "loss": 0.2102, "step": 84575 }, { "epoch": 0.83, "grad_norm": 21.75100326538086, "learning_rate": 3.7145172413793106e-06, "loss": 0.3794, "step": 84600 }, { "epoch": 0.83, "grad_norm": 16.36473846435547, "learning_rate": 3.714086206896552e-06, "loss": 0.2068, "step": 84625 }, { "epoch": 0.83, "grad_norm": 6.704276084899902, "learning_rate": 3.713655172413793e-06, "loss": 0.3851, "step": 84650 }, { "epoch": 0.83, "grad_norm": 8.932334899902344, "learning_rate": 3.7132241379310346e-06, "loss": 0.2208, "step": 84675 }, { "epoch": 0.83, "grad_norm": 19.635385513305664, "learning_rate": 3.712793103448276e-06, "loss": 0.3847, "step": 84700 }, { "epoch": 0.83, "grad_norm": 11.136969566345215, "learning_rate": 3.7123620689655175e-06, "loss": 0.2235, "step": 84725 }, { "epoch": 0.83, "grad_norm": 17.673311233520508, "learning_rate": 3.7119310344827593e-06, "loss": 0.3758, "step": 84750 }, { "epoch": 0.83, "grad_norm": 13.729745864868164, "learning_rate": 3.7115e-06, "loss": 0.2053, "step": 84775 }, { "epoch": 0.83, "grad_norm": 19.0612850189209, "learning_rate": 3.7110689655172414e-06, "loss": 0.4039, "step": 84800 }, { "epoch": 0.83, "grad_norm": 5.27508544921875, "learning_rate": 3.7106379310344833e-06, "loss": 0.2104, "step": 84825 }, { "epoch": 0.83, "grad_norm": 22.18791961669922, "learning_rate": 3.7102068965517247e-06, "loss": 0.3977, "step": 84850 }, { "epoch": 0.83, "grad_norm": 6.390852928161621, "learning_rate": 3.7097758620689654e-06, "loss": 0.1882, "step": 84875 }, { "epoch": 0.84, "grad_norm": 9.501365661621094, "learning_rate": 3.7093448275862072e-06, "loss": 0.2753, "step": 84900 }, { "epoch": 0.84, "grad_norm": 5.578997611999512, "learning_rate": 3.7089137931034487e-06, "loss": 0.1931, "step": 84925 }, { "epoch": 0.84, "grad_norm": 16.92791748046875, "learning_rate": 3.70848275862069e-06, "loss": 0.3409, "step": 84950 }, { "epoch": 0.84, "grad_norm": 6.700046539306641, "learning_rate": 3.7080517241379316e-06, "loss": 0.2089, "step": 84975 }, { "epoch": 0.84, "grad_norm": 21.85805892944336, "learning_rate": 3.7076206896551726e-06, "loss": 0.401, "step": 85000 }, { "epoch": 0.84, "grad_norm": 13.1927490234375, "learning_rate": 3.707189655172414e-06, "loss": 0.2076, "step": 85025 }, { "epoch": 0.84, "grad_norm": 20.5313777923584, "learning_rate": 3.7067586206896556e-06, "loss": 0.4186, "step": 85050 }, { "epoch": 0.84, "grad_norm": 15.213386535644531, "learning_rate": 3.706327586206897e-06, "loss": 0.2233, "step": 85075 }, { "epoch": 0.84, "grad_norm": 18.423900604248047, "learning_rate": 3.705896551724138e-06, "loss": 0.4183, "step": 85100 }, { "epoch": 0.84, "grad_norm": 6.951155185699463, "learning_rate": 3.7054655172413795e-06, "loss": 0.1827, "step": 85125 }, { "epoch": 0.84, "grad_norm": 19.937376022338867, "learning_rate": 3.705034482758621e-06, "loss": 0.3704, "step": 85150 }, { "epoch": 0.84, "grad_norm": 6.45441198348999, "learning_rate": 3.7046034482758624e-06, "loss": 0.1711, "step": 85175 }, { "epoch": 0.84, "grad_norm": 16.999549865722656, "learning_rate": 3.704172413793104e-06, "loss": 0.3567, "step": 85200 }, { "epoch": 0.84, "grad_norm": 12.582741737365723, "learning_rate": 3.703741379310345e-06, "loss": 0.2186, "step": 85225 }, { "epoch": 0.84, "grad_norm": 15.587937355041504, "learning_rate": 3.7033103448275864e-06, "loss": 0.3383, "step": 85250 }, { "epoch": 0.84, "grad_norm": 11.04648208618164, "learning_rate": 3.702879310344828e-06, "loss": 0.2285, "step": 85275 }, { "epoch": 0.84, "grad_norm": 14.66844367980957, "learning_rate": 3.7024482758620693e-06, "loss": 0.3471, "step": 85300 }, { "epoch": 0.84, "grad_norm": 7.567780494689941, "learning_rate": 3.7020172413793103e-06, "loss": 0.2196, "step": 85325 }, { "epoch": 0.84, "grad_norm": 10.111343383789062, "learning_rate": 3.7015862068965518e-06, "loss": 0.4105, "step": 85350 }, { "epoch": 0.84, "grad_norm": 16.267805099487305, "learning_rate": 3.7011551724137932e-06, "loss": 0.2346, "step": 85375 }, { "epoch": 0.84, "grad_norm": 17.489933013916016, "learning_rate": 3.700724137931035e-06, "loss": 0.3789, "step": 85400 }, { "epoch": 0.84, "grad_norm": 12.330724716186523, "learning_rate": 3.7002931034482766e-06, "loss": 0.2037, "step": 85425 }, { "epoch": 0.84, "grad_norm": 13.695712089538574, "learning_rate": 3.699862068965517e-06, "loss": 0.46, "step": 85450 }, { "epoch": 0.84, "grad_norm": 11.578132629394531, "learning_rate": 3.699431034482759e-06, "loss": 0.209, "step": 85475 }, { "epoch": 0.84, "grad_norm": 19.980506896972656, "learning_rate": 3.6990000000000005e-06, "loss": 0.395, "step": 85500 }, { "epoch": 0.84, "grad_norm": 7.461217403411865, "learning_rate": 3.698568965517242e-06, "loss": 0.2015, "step": 85525 }, { "epoch": 0.84, "grad_norm": 22.54290199279785, "learning_rate": 3.698137931034483e-06, "loss": 0.3565, "step": 85550 }, { "epoch": 0.84, "grad_norm": 8.12817668914795, "learning_rate": 3.6977068965517244e-06, "loss": 0.2329, "step": 85575 }, { "epoch": 0.84, "grad_norm": 15.890487670898438, "learning_rate": 3.697275862068966e-06, "loss": 0.3703, "step": 85600 }, { "epoch": 0.84, "grad_norm": 6.879053592681885, "learning_rate": 3.6968448275862074e-06, "loss": 0.2051, "step": 85625 }, { "epoch": 0.84, "grad_norm": 12.512064933776855, "learning_rate": 3.696413793103449e-06, "loss": 0.3439, "step": 85650 }, { "epoch": 0.84, "grad_norm": 7.829655647277832, "learning_rate": 3.69598275862069e-06, "loss": 0.2266, "step": 85675 }, { "epoch": 0.84, "grad_norm": 20.99164390563965, "learning_rate": 3.6955517241379313e-06, "loss": 0.393, "step": 85700 }, { "epoch": 0.84, "grad_norm": 9.317792892456055, "learning_rate": 3.6951206896551728e-06, "loss": 0.2326, "step": 85725 }, { "epoch": 0.84, "grad_norm": 11.04901123046875, "learning_rate": 3.6946896551724142e-06, "loss": 0.3557, "step": 85750 }, { "epoch": 0.84, "grad_norm": 10.112868309020996, "learning_rate": 3.6942586206896552e-06, "loss": 0.2, "step": 85775 }, { "epoch": 0.84, "grad_norm": 11.330543518066406, "learning_rate": 3.6938275862068967e-06, "loss": 0.3371, "step": 85800 }, { "epoch": 0.84, "grad_norm": 13.157329559326172, "learning_rate": 3.693396551724138e-06, "loss": 0.2358, "step": 85825 }, { "epoch": 0.84, "grad_norm": 12.000883102416992, "learning_rate": 3.6929655172413796e-06, "loss": 0.3693, "step": 85850 }, { "epoch": 0.84, "grad_norm": 9.074806213378906, "learning_rate": 3.692534482758621e-06, "loss": 0.1936, "step": 85875 }, { "epoch": 0.84, "grad_norm": 24.916484832763672, "learning_rate": 3.692103448275862e-06, "loss": 0.3611, "step": 85900 }, { "epoch": 0.85, "grad_norm": 9.956232070922852, "learning_rate": 3.6916724137931036e-06, "loss": 0.1862, "step": 85925 }, { "epoch": 0.85, "grad_norm": 10.543898582458496, "learning_rate": 3.691241379310345e-06, "loss": 0.3503, "step": 85950 }, { "epoch": 0.85, "grad_norm": 9.157842636108398, "learning_rate": 3.690810344827587e-06, "loss": 0.2413, "step": 85975 }, { "epoch": 0.85, "grad_norm": 13.597996711730957, "learning_rate": 3.6903793103448275e-06, "loss": 0.3575, "step": 86000 }, { "epoch": 0.85, "grad_norm": 5.480571269989014, "learning_rate": 3.689948275862069e-06, "loss": 0.2047, "step": 86025 }, { "epoch": 0.85, "grad_norm": 20.861862182617188, "learning_rate": 3.689517241379311e-06, "loss": 0.3777, "step": 86050 }, { "epoch": 0.85, "grad_norm": 9.161151885986328, "learning_rate": 3.6890862068965523e-06, "loss": 0.2, "step": 86075 }, { "epoch": 0.85, "grad_norm": 18.383073806762695, "learning_rate": 3.6886551724137938e-06, "loss": 0.3404, "step": 86100 }, { "epoch": 0.85, "grad_norm": 6.239759922027588, "learning_rate": 3.688224137931035e-06, "loss": 0.2165, "step": 86125 }, { "epoch": 0.85, "grad_norm": 19.255542755126953, "learning_rate": 3.6877931034482762e-06, "loss": 0.4032, "step": 86150 }, { "epoch": 0.85, "grad_norm": 8.799480438232422, "learning_rate": 3.6873620689655177e-06, "loss": 0.2208, "step": 86175 }, { "epoch": 0.85, "grad_norm": 17.251829147338867, "learning_rate": 3.686931034482759e-06, "loss": 0.3909, "step": 86200 }, { "epoch": 0.85, "grad_norm": 5.685675144195557, "learning_rate": 3.6865e-06, "loss": 0.2158, "step": 86225 }, { "epoch": 0.85, "grad_norm": 16.132593154907227, "learning_rate": 3.6860689655172417e-06, "loss": 0.3634, "step": 86250 }, { "epoch": 0.85, "grad_norm": 7.200938701629639, "learning_rate": 3.685637931034483e-06, "loss": 0.226, "step": 86275 }, { "epoch": 0.85, "grad_norm": 9.148785591125488, "learning_rate": 3.6852068965517246e-06, "loss": 0.3821, "step": 86300 }, { "epoch": 0.85, "grad_norm": 7.728355407714844, "learning_rate": 3.684775862068966e-06, "loss": 0.203, "step": 86325 }, { "epoch": 0.85, "grad_norm": 17.25510025024414, "learning_rate": 3.684344827586207e-06, "loss": 0.4252, "step": 86350 }, { "epoch": 0.85, "grad_norm": 9.603300094604492, "learning_rate": 3.6839137931034485e-06, "loss": 0.1928, "step": 86375 }, { "epoch": 0.85, "grad_norm": Infinity, "learning_rate": 3.6835000000000004e-06, "loss": 0.3774, "step": 86400 }, { "epoch": 0.85, "grad_norm": 4.77321195602417, "learning_rate": 3.683068965517242e-06, "loss": 0.2351, "step": 86425 }, { "epoch": 0.85, "grad_norm": 20.661029815673828, "learning_rate": 3.682637931034483e-06, "loss": 0.3996, "step": 86450 }, { "epoch": 0.85, "grad_norm": 6.373960971832275, "learning_rate": 3.6822068965517243e-06, "loss": 0.2053, "step": 86475 }, { "epoch": 0.85, "grad_norm": 17.26689910888672, "learning_rate": 3.681775862068966e-06, "loss": 0.3875, "step": 86500 }, { "epoch": 0.85, "grad_norm": 7.781322956085205, "learning_rate": 3.6813448275862073e-06, "loss": 0.2086, "step": 86525 }, { "epoch": 0.85, "grad_norm": 24.462221145629883, "learning_rate": 3.6809137931034483e-06, "loss": 0.4326, "step": 86550 }, { "epoch": 0.85, "grad_norm": 8.92834758758545, "learning_rate": 3.6804827586206897e-06, "loss": 0.1742, "step": 86575 }, { "epoch": 0.85, "grad_norm": 18.85892105102539, "learning_rate": 3.680051724137931e-06, "loss": 0.3603, "step": 86600 }, { "epoch": 0.85, "grad_norm": 19.80361557006836, "learning_rate": 3.6796206896551727e-06, "loss": 0.2014, "step": 86625 }, { "epoch": 0.85, "grad_norm": 20.057209014892578, "learning_rate": 3.6791896551724145e-06, "loss": 0.3842, "step": 86650 }, { "epoch": 0.85, "grad_norm": 7.918477535247803, "learning_rate": 3.678758620689655e-06, "loss": 0.2421, "step": 86675 }, { "epoch": 0.85, "grad_norm": 16.510698318481445, "learning_rate": 3.6783275862068966e-06, "loss": 0.3942, "step": 86700 }, { "epoch": 0.85, "grad_norm": 10.30992317199707, "learning_rate": 3.6778965517241385e-06, "loss": 0.2166, "step": 86725 }, { "epoch": 0.85, "grad_norm": 20.311012268066406, "learning_rate": 3.67746551724138e-06, "loss": 0.2954, "step": 86750 }, { "epoch": 0.85, "grad_norm": 12.05093765258789, "learning_rate": 3.6770344827586205e-06, "loss": 0.1852, "step": 86775 }, { "epoch": 0.85, "grad_norm": 17.104740142822266, "learning_rate": 3.6766034482758624e-06, "loss": 0.4232, "step": 86800 }, { "epoch": 0.85, "grad_norm": 12.017961502075195, "learning_rate": 3.676172413793104e-06, "loss": 0.1898, "step": 86825 }, { "epoch": 0.85, "grad_norm": 1.585330843925476, "learning_rate": 3.6757413793103453e-06, "loss": 0.3218, "step": 86850 }, { "epoch": 0.85, "grad_norm": 10.840874671936035, "learning_rate": 3.675310344827587e-06, "loss": 0.2515, "step": 86875 }, { "epoch": 0.85, "grad_norm": 27.896482467651367, "learning_rate": 3.674879310344828e-06, "loss": 0.461, "step": 86900 }, { "epoch": 0.85, "grad_norm": 7.717704772949219, "learning_rate": 3.6744482758620693e-06, "loss": 0.2045, "step": 86925 }, { "epoch": 0.86, "grad_norm": 16.568344116210938, "learning_rate": 3.6740172413793107e-06, "loss": 0.3081, "step": 86950 }, { "epoch": 0.86, "grad_norm": 12.497018814086914, "learning_rate": 3.673586206896552e-06, "loss": 0.2021, "step": 86975 }, { "epoch": 0.86, "grad_norm": 20.697038650512695, "learning_rate": 3.6731551724137932e-06, "loss": 0.387, "step": 87000 }, { "epoch": 0.86, "grad_norm": 8.135305404663086, "learning_rate": 3.6727241379310347e-06, "loss": 0.2329, "step": 87025 }, { "epoch": 0.86, "grad_norm": 11.585744857788086, "learning_rate": 3.672293103448276e-06, "loss": 0.4092, "step": 87050 }, { "epoch": 0.86, "grad_norm": 7.260258674621582, "learning_rate": 3.6718620689655176e-06, "loss": 0.2499, "step": 87075 }, { "epoch": 0.86, "grad_norm": 14.788064002990723, "learning_rate": 3.671431034482759e-06, "loss": 0.3666, "step": 87100 }, { "epoch": 0.86, "grad_norm": 7.343393325805664, "learning_rate": 3.671e-06, "loss": 0.2629, "step": 87125 }, { "epoch": 0.86, "grad_norm": 14.210214614868164, "learning_rate": 3.6705689655172415e-06, "loss": 0.4189, "step": 87150 }, { "epoch": 0.86, "grad_norm": 6.823200702667236, "learning_rate": 3.670137931034483e-06, "loss": 0.2029, "step": 87175 }, { "epoch": 0.86, "grad_norm": 20.08745002746582, "learning_rate": 3.6697068965517245e-06, "loss": 0.3408, "step": 87200 }, { "epoch": 0.86, "grad_norm": 6.493013858795166, "learning_rate": 3.6692758620689655e-06, "loss": 0.2235, "step": 87225 }, { "epoch": 0.86, "grad_norm": 10.252216339111328, "learning_rate": 3.668844827586207e-06, "loss": 0.3377, "step": 87250 }, { "epoch": 0.86, "grad_norm": 10.729330062866211, "learning_rate": 3.6684137931034484e-06, "loss": 0.2203, "step": 87275 }, { "epoch": 0.86, "grad_norm": 19.121843338012695, "learning_rate": 3.6679827586206903e-06, "loss": 0.3266, "step": 87300 }, { "epoch": 0.86, "grad_norm": 15.117217063903809, "learning_rate": 3.6675517241379317e-06, "loss": 0.2536, "step": 87325 }, { "epoch": 0.86, "grad_norm": 14.598043441772461, "learning_rate": 3.6671206896551724e-06, "loss": 0.4293, "step": 87350 }, { "epoch": 0.86, "grad_norm": 8.97378921508789, "learning_rate": 3.6666896551724142e-06, "loss": 0.206, "step": 87375 }, { "epoch": 0.86, "grad_norm": 16.541913986206055, "learning_rate": 3.6662586206896557e-06, "loss": 0.4582, "step": 87400 }, { "epoch": 0.86, "grad_norm": 11.406453132629395, "learning_rate": 3.665827586206897e-06, "loss": 0.2157, "step": 87425 }, { "epoch": 0.86, "grad_norm": 17.523080825805664, "learning_rate": 3.665396551724138e-06, "loss": 0.4001, "step": 87450 }, { "epoch": 0.86, "grad_norm": 10.378917694091797, "learning_rate": 3.6649655172413796e-06, "loss": 0.1995, "step": 87475 }, { "epoch": 0.86, "grad_norm": 12.564750671386719, "learning_rate": 3.664534482758621e-06, "loss": 0.3833, "step": 87500 }, { "epoch": 0.86, "grad_norm": 8.594941139221191, "learning_rate": 3.6641034482758625e-06, "loss": 0.2002, "step": 87525 }, { "epoch": 0.86, "grad_norm": 28.354387283325195, "learning_rate": 3.663672413793104e-06, "loss": 0.4421, "step": 87550 }, { "epoch": 0.86, "grad_norm": 12.700521469116211, "learning_rate": 3.663241379310345e-06, "loss": 0.2483, "step": 87575 }, { "epoch": 0.86, "grad_norm": 22.763427734375, "learning_rate": 3.6628103448275865e-06, "loss": 0.328, "step": 87600 }, { "epoch": 0.86, "grad_norm": 9.680126190185547, "learning_rate": 3.662379310344828e-06, "loss": 0.2211, "step": 87625 }, { "epoch": 0.86, "grad_norm": 20.2335147857666, "learning_rate": 3.6619482758620694e-06, "loss": 0.3314, "step": 87650 }, { "epoch": 0.86, "grad_norm": 3.8915226459503174, "learning_rate": 3.6615172413793104e-06, "loss": 0.1876, "step": 87675 }, { "epoch": 0.86, "grad_norm": 15.591106414794922, "learning_rate": 3.661086206896552e-06, "loss": 0.4136, "step": 87700 }, { "epoch": 0.86, "grad_norm": 13.957134246826172, "learning_rate": 3.6606551724137933e-06, "loss": 0.2266, "step": 87725 }, { "epoch": 0.86, "grad_norm": 17.762563705444336, "learning_rate": 3.660224137931035e-06, "loss": 0.3798, "step": 87750 }, { "epoch": 0.86, "grad_norm": 9.68734073638916, "learning_rate": 3.6597931034482763e-06, "loss": 0.1767, "step": 87775 }, { "epoch": 0.86, "grad_norm": 15.380755424499512, "learning_rate": 3.6593620689655173e-06, "loss": 0.3889, "step": 87800 }, { "epoch": 0.86, "grad_norm": 6.423186302185059, "learning_rate": 3.6589310344827588e-06, "loss": 0.1963, "step": 87825 }, { "epoch": 0.86, "grad_norm": 22.139394760131836, "learning_rate": 3.6585e-06, "loss": 0.4651, "step": 87850 }, { "epoch": 0.86, "grad_norm": 7.277993679046631, "learning_rate": 3.658068965517242e-06, "loss": 0.2098, "step": 87875 }, { "epoch": 0.86, "grad_norm": 13.656731605529785, "learning_rate": 3.6576379310344827e-06, "loss": 0.452, "step": 87900 }, { "epoch": 0.86, "grad_norm": 8.181255340576172, "learning_rate": 3.657206896551724e-06, "loss": 0.2243, "step": 87925 }, { "epoch": 0.87, "grad_norm": 10.41123104095459, "learning_rate": 3.656775862068966e-06, "loss": 0.3137, "step": 87950 }, { "epoch": 0.87, "grad_norm": 13.600153923034668, "learning_rate": 3.6563448275862075e-06, "loss": 0.2595, "step": 87975 }, { "epoch": 0.87, "grad_norm": 17.557823181152344, "learning_rate": 3.655913793103449e-06, "loss": 0.38, "step": 88000 }, { "epoch": 0.87, "grad_norm": 12.888132095336914, "learning_rate": 3.65548275862069e-06, "loss": 0.2644, "step": 88025 }, { "epoch": 0.87, "grad_norm": 8.893601417541504, "learning_rate": 3.6550517241379314e-06, "loss": 0.3618, "step": 88050 }, { "epoch": 0.87, "grad_norm": 5.391237258911133, "learning_rate": 3.654620689655173e-06, "loss": 0.226, "step": 88075 }, { "epoch": 0.87, "grad_norm": 23.847801208496094, "learning_rate": 3.6541896551724143e-06, "loss": 0.3846, "step": 88100 }, { "epoch": 0.87, "grad_norm": 9.932829856872559, "learning_rate": 3.6537586206896554e-06, "loss": 0.22, "step": 88125 }, { "epoch": 0.87, "grad_norm": 11.663823127746582, "learning_rate": 3.653327586206897e-06, "loss": 0.3847, "step": 88150 }, { "epoch": 0.87, "grad_norm": 7.543575286865234, "learning_rate": 3.6528965517241383e-06, "loss": 0.2155, "step": 88175 }, { "epoch": 0.87, "grad_norm": 20.75132179260254, "learning_rate": 3.6524655172413798e-06, "loss": 0.3921, "step": 88200 }, { "epoch": 0.87, "grad_norm": 14.096184730529785, "learning_rate": 3.6520344827586208e-06, "loss": 0.3166, "step": 88225 }, { "epoch": 0.87, "grad_norm": 22.80483627319336, "learning_rate": 3.6516034482758622e-06, "loss": 0.3554, "step": 88250 }, { "epoch": 0.87, "grad_norm": 6.562742710113525, "learning_rate": 3.6511724137931037e-06, "loss": 0.192, "step": 88275 }, { "epoch": 0.87, "grad_norm": 14.285913467407227, "learning_rate": 3.650741379310345e-06, "loss": 0.4175, "step": 88300 }, { "epoch": 0.87, "grad_norm": 9.202223777770996, "learning_rate": 3.6503103448275866e-06, "loss": 0.2263, "step": 88325 }, { "epoch": 0.87, "grad_norm": 13.386361122131348, "learning_rate": 3.6498793103448276e-06, "loss": 0.4197, "step": 88350 }, { "epoch": 0.87, "grad_norm": 8.59383773803711, "learning_rate": 3.649448275862069e-06, "loss": 0.1813, "step": 88375 }, { "epoch": 0.87, "grad_norm": 11.598912239074707, "learning_rate": 3.6490172413793106e-06, "loss": 0.3173, "step": 88400 }, { "epoch": 0.87, "grad_norm": 5.958115100860596, "learning_rate": 3.648586206896552e-06, "loss": 0.2074, "step": 88425 }, { "epoch": 0.87, "grad_norm": 14.826434135437012, "learning_rate": 3.6481724137931035e-06, "loss": 0.3633, "step": 88450 }, { "epoch": 0.87, "grad_norm": 11.19669246673584, "learning_rate": 3.647741379310345e-06, "loss": 0.1974, "step": 88475 }, { "epoch": 0.87, "grad_norm": 16.65646743774414, "learning_rate": 3.6473103448275864e-06, "loss": 0.3454, "step": 88500 }, { "epoch": 0.87, "grad_norm": 9.858088493347168, "learning_rate": 3.646879310344828e-06, "loss": 0.2008, "step": 88525 }, { "epoch": 0.87, "grad_norm": 12.31371021270752, "learning_rate": 3.6464482758620693e-06, "loss": 0.3805, "step": 88550 }, { "epoch": 0.87, "grad_norm": 5.003921985626221, "learning_rate": 3.6460172413793103e-06, "loss": 0.2248, "step": 88575 }, { "epoch": 0.87, "grad_norm": 14.707036018371582, "learning_rate": 3.6455862068965518e-06, "loss": 0.3669, "step": 88600 }, { "epoch": 0.87, "grad_norm": 11.757915496826172, "learning_rate": 3.6451551724137937e-06, "loss": 0.2405, "step": 88625 }, { "epoch": 0.87, "grad_norm": 13.432592391967773, "learning_rate": 3.644724137931035e-06, "loss": 0.3253, "step": 88650 }, { "epoch": 0.87, "grad_norm": 9.552202224731445, "learning_rate": 3.6442931034482757e-06, "loss": 0.171, "step": 88675 }, { "epoch": 0.87, "grad_norm": 7.471747398376465, "learning_rate": 3.6438620689655176e-06, "loss": 0.34, "step": 88700 }, { "epoch": 0.87, "grad_norm": 3.924924850463867, "learning_rate": 3.643431034482759e-06, "loss": 0.2007, "step": 88725 }, { "epoch": 0.87, "grad_norm": 14.969651222229004, "learning_rate": 3.6430000000000005e-06, "loss": 0.445, "step": 88750 }, { "epoch": 0.87, "grad_norm": 15.910080909729004, "learning_rate": 3.642568965517242e-06, "loss": 0.2129, "step": 88775 }, { "epoch": 0.87, "grad_norm": 17.410444259643555, "learning_rate": 3.642137931034483e-06, "loss": 0.3463, "step": 88800 }, { "epoch": 0.87, "grad_norm": 9.335878372192383, "learning_rate": 3.6417068965517245e-06, "loss": 0.2178, "step": 88825 }, { "epoch": 0.87, "grad_norm": 25.821056365966797, "learning_rate": 3.641275862068966e-06, "loss": 0.3839, "step": 88850 }, { "epoch": 0.87, "grad_norm": 10.234067916870117, "learning_rate": 3.6408448275862074e-06, "loss": 0.1884, "step": 88875 }, { "epoch": 0.87, "grad_norm": 13.642786979675293, "learning_rate": 3.6404137931034484e-06, "loss": 0.396, "step": 88900 }, { "epoch": 0.87, "grad_norm": 10.522789001464844, "learning_rate": 3.63998275862069e-06, "loss": 0.1697, "step": 88925 }, { "epoch": 0.87, "grad_norm": 22.30982780456543, "learning_rate": 3.6395517241379313e-06, "loss": 0.3668, "step": 88950 }, { "epoch": 0.88, "grad_norm": 9.788476943969727, "learning_rate": 3.6391206896551728e-06, "loss": 0.1935, "step": 88975 }, { "epoch": 0.88, "grad_norm": 15.854048728942871, "learning_rate": 3.6386896551724142e-06, "loss": 0.3461, "step": 89000 }, { "epoch": 0.88, "grad_norm": 8.214479446411133, "learning_rate": 3.6382586206896553e-06, "loss": 0.2084, "step": 89025 }, { "epoch": 0.88, "grad_norm": 14.463886260986328, "learning_rate": 3.6378275862068967e-06, "loss": 0.3565, "step": 89050 }, { "epoch": 0.88, "grad_norm": 8.711833000183105, "learning_rate": 3.637396551724138e-06, "loss": 0.2184, "step": 89075 }, { "epoch": 0.88, "grad_norm": 22.236663818359375, "learning_rate": 3.6369655172413796e-06, "loss": 0.302, "step": 89100 }, { "epoch": 0.88, "grad_norm": 9.750212669372559, "learning_rate": 3.6365344827586207e-06, "loss": 0.2087, "step": 89125 }, { "epoch": 0.88, "grad_norm": 20.048105239868164, "learning_rate": 3.636103448275862e-06, "loss": 0.4225, "step": 89150 }, { "epoch": 0.88, "grad_norm": 17.56796646118164, "learning_rate": 3.6356724137931036e-06, "loss": 0.2011, "step": 89175 }, { "epoch": 0.88, "grad_norm": 28.582231521606445, "learning_rate": 3.6352413793103455e-06, "loss": 0.4242, "step": 89200 }, { "epoch": 0.88, "grad_norm": 4.724441051483154, "learning_rate": 3.634810344827587e-06, "loss": 0.2166, "step": 89225 }, { "epoch": 0.88, "grad_norm": 14.3809232711792, "learning_rate": 3.6343793103448275e-06, "loss": 0.3823, "step": 89250 }, { "epoch": 0.88, "grad_norm": 10.635173797607422, "learning_rate": 3.6339482758620694e-06, "loss": 0.2296, "step": 89275 }, { "epoch": 0.88, "grad_norm": 22.58611297607422, "learning_rate": 3.633517241379311e-06, "loss": 0.3612, "step": 89300 }, { "epoch": 0.88, "grad_norm": 8.093688011169434, "learning_rate": 3.6330862068965523e-06, "loss": 0.2235, "step": 89325 }, { "epoch": 0.88, "grad_norm": 24.45104217529297, "learning_rate": 3.6326551724137934e-06, "loss": 0.3542, "step": 89350 }, { "epoch": 0.88, "grad_norm": 5.889479637145996, "learning_rate": 3.632224137931035e-06, "loss": 0.1749, "step": 89375 }, { "epoch": 0.88, "grad_norm": 21.324077606201172, "learning_rate": 3.6317931034482763e-06, "loss": 0.3348, "step": 89400 }, { "epoch": 0.88, "grad_norm": 16.279016494750977, "learning_rate": 3.6313620689655177e-06, "loss": 0.1927, "step": 89425 }, { "epoch": 0.88, "grad_norm": 17.562700271606445, "learning_rate": 3.630931034482759e-06, "loss": 0.3245, "step": 89450 }, { "epoch": 0.88, "grad_norm": 13.034242630004883, "learning_rate": 3.6305000000000002e-06, "loss": 0.2172, "step": 89475 }, { "epoch": 0.88, "grad_norm": 10.733375549316406, "learning_rate": 3.6300689655172417e-06, "loss": 0.4015, "step": 89500 }, { "epoch": 0.88, "grad_norm": 10.697904586791992, "learning_rate": 3.629637931034483e-06, "loss": 0.198, "step": 89525 }, { "epoch": 0.88, "grad_norm": 14.775642395019531, "learning_rate": 3.6292068965517246e-06, "loss": 0.342, "step": 89550 }, { "epoch": 0.88, "grad_norm": 11.180989265441895, "learning_rate": 3.6287758620689656e-06, "loss": 0.2049, "step": 89575 }, { "epoch": 0.88, "grad_norm": 18.539583206176758, "learning_rate": 3.628344827586207e-06, "loss": 0.3342, "step": 89600 }, { "epoch": 0.88, "grad_norm": 10.417204856872559, "learning_rate": 3.6279137931034485e-06, "loss": 0.2071, "step": 89625 }, { "epoch": 0.88, "grad_norm": 8.008028984069824, "learning_rate": 3.62748275862069e-06, "loss": 0.3492, "step": 89650 }, { "epoch": 0.88, "grad_norm": 9.514058113098145, "learning_rate": 3.6270517241379315e-06, "loss": 0.2091, "step": 89675 }, { "epoch": 0.88, "grad_norm": 19.20466423034668, "learning_rate": 3.6266206896551725e-06, "loss": 0.358, "step": 89700 }, { "epoch": 0.88, "grad_norm": 8.947748184204102, "learning_rate": 3.626189655172414e-06, "loss": 0.1761, "step": 89725 }, { "epoch": 0.88, "grad_norm": 17.070451736450195, "learning_rate": 3.6257586206896554e-06, "loss": 0.4095, "step": 89750 }, { "epoch": 0.88, "grad_norm": 7.408143043518066, "learning_rate": 3.625327586206897e-06, "loss": 0.2221, "step": 89775 }, { "epoch": 0.88, "grad_norm": 17.057024002075195, "learning_rate": 3.624896551724138e-06, "loss": 0.3598, "step": 89800 }, { "epoch": 0.88, "grad_norm": 4.806582927703857, "learning_rate": 3.6244655172413793e-06, "loss": 0.2303, "step": 89825 }, { "epoch": 0.88, "grad_norm": 13.322532653808594, "learning_rate": 3.6240344827586212e-06, "loss": 0.3468, "step": 89850 }, { "epoch": 0.88, "grad_norm": 12.469046592712402, "learning_rate": 3.6236034482758627e-06, "loss": 0.1924, "step": 89875 }, { "epoch": 0.88, "grad_norm": 27.460222244262695, "learning_rate": 3.6231724137931033e-06, "loss": 0.3098, "step": 89900 }, { "epoch": 0.88, "grad_norm": 10.161355018615723, "learning_rate": 3.622741379310345e-06, "loss": 0.2067, "step": 89925 }, { "epoch": 0.88, "grad_norm": 21.27857780456543, "learning_rate": 3.6223103448275866e-06, "loss": 0.3848, "step": 89950 }, { "epoch": 0.88, "grad_norm": 1.9255393743515015, "learning_rate": 3.621879310344828e-06, "loss": 0.2049, "step": 89975 }, { "epoch": 0.89, "grad_norm": 17.983484268188477, "learning_rate": 3.6214482758620695e-06, "loss": 0.4013, "step": 90000 }, { "epoch": 0.89, "grad_norm": 4.878505706787109, "learning_rate": 3.6210172413793106e-06, "loss": 0.2451, "step": 90025 }, { "epoch": 0.89, "grad_norm": 21.83017349243164, "learning_rate": 3.620586206896552e-06, "loss": 0.4531, "step": 90050 }, { "epoch": 0.89, "grad_norm": 10.774433135986328, "learning_rate": 3.6201551724137935e-06, "loss": 0.1911, "step": 90075 }, { "epoch": 0.89, "grad_norm": 10.753467559814453, "learning_rate": 3.619724137931035e-06, "loss": 0.335, "step": 90100 }, { "epoch": 0.89, "grad_norm": 7.728938102722168, "learning_rate": 3.619293103448276e-06, "loss": 0.1682, "step": 90125 }, { "epoch": 0.89, "grad_norm": 11.42482852935791, "learning_rate": 3.6188620689655174e-06, "loss": 0.3422, "step": 90150 }, { "epoch": 0.89, "grad_norm": 9.664454460144043, "learning_rate": 3.618431034482759e-06, "loss": 0.2081, "step": 90175 }, { "epoch": 0.89, "grad_norm": 11.366732597351074, "learning_rate": 3.6180000000000003e-06, "loss": 0.3691, "step": 90200 }, { "epoch": 0.89, "grad_norm": 7.875049114227295, "learning_rate": 3.617568965517242e-06, "loss": 0.2261, "step": 90225 }, { "epoch": 0.89, "grad_norm": 15.383584976196289, "learning_rate": 3.617137931034483e-06, "loss": 0.3565, "step": 90250 }, { "epoch": 0.89, "grad_norm": 9.724736213684082, "learning_rate": 3.6167068965517243e-06, "loss": 0.2238, "step": 90275 }, { "epoch": 0.89, "grad_norm": 12.18232536315918, "learning_rate": 3.6162758620689657e-06, "loss": 0.3896, "step": 90300 }, { "epoch": 0.89, "grad_norm": 4.267768859863281, "learning_rate": 3.615844827586207e-06, "loss": 0.2208, "step": 90325 }, { "epoch": 0.89, "grad_norm": 14.118539810180664, "learning_rate": 3.6154137931034482e-06, "loss": 0.3139, "step": 90350 }, { "epoch": 0.89, "grad_norm": 12.019610404968262, "learning_rate": 3.6149827586206897e-06, "loss": 0.1989, "step": 90375 }, { "epoch": 0.89, "grad_norm": 20.468849182128906, "learning_rate": 3.614551724137931e-06, "loss": 0.4818, "step": 90400 }, { "epoch": 0.89, "grad_norm": 16.343305587768555, "learning_rate": 3.614120689655173e-06, "loss": 0.1957, "step": 90425 }, { "epoch": 0.89, "grad_norm": 16.77923011779785, "learning_rate": 3.6136896551724145e-06, "loss": 0.4352, "step": 90450 }, { "epoch": 0.89, "grad_norm": 16.59072494506836, "learning_rate": 3.613258620689655e-06, "loss": 0.1827, "step": 90475 }, { "epoch": 0.89, "grad_norm": 15.806279182434082, "learning_rate": 3.612844827586207e-06, "loss": 0.3607, "step": 90500 }, { "epoch": 0.89, "grad_norm": 14.834769248962402, "learning_rate": 3.6124137931034484e-06, "loss": 0.2053, "step": 90525 }, { "epoch": 0.89, "grad_norm": 23.49019432067871, "learning_rate": 3.6119827586206903e-06, "loss": 0.3401, "step": 90550 }, { "epoch": 0.89, "grad_norm": 7.827987194061279, "learning_rate": 3.611551724137931e-06, "loss": 0.2007, "step": 90575 }, { "epoch": 0.89, "grad_norm": 13.130210876464844, "learning_rate": 3.611120689655173e-06, "loss": 0.3897, "step": 90600 }, { "epoch": 0.89, "grad_norm": 6.743232250213623, "learning_rate": 3.6106896551724143e-06, "loss": 0.2079, "step": 90625 }, { "epoch": 0.89, "grad_norm": 21.450660705566406, "learning_rate": 3.6102586206896557e-06, "loss": 0.3559, "step": 90650 }, { "epoch": 0.89, "grad_norm": 7.891610622406006, "learning_rate": 3.609827586206897e-06, "loss": 0.1855, "step": 90675 }, { "epoch": 0.89, "grad_norm": 19.687265396118164, "learning_rate": 3.609396551724138e-06, "loss": 0.3882, "step": 90700 }, { "epoch": 0.89, "grad_norm": 10.141528129577637, "learning_rate": 3.6089655172413797e-06, "loss": 0.2299, "step": 90725 }, { "epoch": 0.89, "grad_norm": 21.346908569335938, "learning_rate": 3.608534482758621e-06, "loss": 0.4054, "step": 90750 }, { "epoch": 0.89, "grad_norm": 6.433400630950928, "learning_rate": 3.6081034482758626e-06, "loss": 0.1964, "step": 90775 }, { "epoch": 0.89, "grad_norm": 15.283792495727539, "learning_rate": 3.6076724137931036e-06, "loss": 0.3231, "step": 90800 }, { "epoch": 0.89, "grad_norm": 11.206706047058105, "learning_rate": 3.607241379310345e-06, "loss": 0.1902, "step": 90825 }, { "epoch": 0.89, "grad_norm": 6.068081378936768, "learning_rate": 3.6068103448275865e-06, "loss": 0.3816, "step": 90850 }, { "epoch": 0.89, "grad_norm": 4.786280632019043, "learning_rate": 3.606379310344828e-06, "loss": 0.1919, "step": 90875 }, { "epoch": 0.89, "grad_norm": 14.631484985351562, "learning_rate": 3.6059482758620694e-06, "loss": 0.4047, "step": 90900 }, { "epoch": 0.89, "grad_norm": 5.092428207397461, "learning_rate": 3.6055172413793105e-06, "loss": 0.1922, "step": 90925 }, { "epoch": 0.89, "grad_norm": 13.224761962890625, "learning_rate": 3.605086206896552e-06, "loss": 0.3298, "step": 90950 }, { "epoch": 0.89, "grad_norm": 8.997218132019043, "learning_rate": 3.6046551724137934e-06, "loss": 0.1939, "step": 90975 }, { "epoch": 0.9, "grad_norm": 14.922693252563477, "learning_rate": 3.604224137931035e-06, "loss": 0.3859, "step": 91000 }, { "epoch": 0.9, "grad_norm": 7.215751647949219, "learning_rate": 3.603793103448276e-06, "loss": 0.2011, "step": 91025 }, { "epoch": 0.9, "grad_norm": 13.754292488098145, "learning_rate": 3.6033620689655173e-06, "loss": 0.402, "step": 91050 }, { "epoch": 0.9, "grad_norm": 6.518117904663086, "learning_rate": 3.6029310344827588e-06, "loss": 0.2064, "step": 91075 }, { "epoch": 0.9, "grad_norm": 17.727203369140625, "learning_rate": 3.6025000000000002e-06, "loss": 0.4053, "step": 91100 }, { "epoch": 0.9, "grad_norm": 6.519466876983643, "learning_rate": 3.602068965517242e-06, "loss": 0.1833, "step": 91125 }, { "epoch": 0.9, "grad_norm": 9.744406700134277, "learning_rate": 3.6016379310344827e-06, "loss": 0.392, "step": 91150 }, { "epoch": 0.9, "grad_norm": 1.3899580240249634, "learning_rate": 3.6012068965517246e-06, "loss": 0.1796, "step": 91175 }, { "epoch": 0.9, "grad_norm": 20.744186401367188, "learning_rate": 3.600775862068966e-06, "loss": 0.3547, "step": 91200 }, { "epoch": 0.9, "grad_norm": 8.639158248901367, "learning_rate": 3.6003448275862075e-06, "loss": 0.1831, "step": 91225 }, { "epoch": 0.9, "grad_norm": 19.311792373657227, "learning_rate": 3.5999137931034486e-06, "loss": 0.3898, "step": 91250 }, { "epoch": 0.9, "grad_norm": 5.7082672119140625, "learning_rate": 3.59948275862069e-06, "loss": 0.2036, "step": 91275 }, { "epoch": 0.9, "grad_norm": 22.11042594909668, "learning_rate": 3.5990517241379315e-06, "loss": 0.4013, "step": 91300 }, { "epoch": 0.9, "grad_norm": 9.52428150177002, "learning_rate": 3.598620689655173e-06, "loss": 0.2153, "step": 91325 }, { "epoch": 0.9, "grad_norm": 16.242063522338867, "learning_rate": 3.5981896551724144e-06, "loss": 0.3963, "step": 91350 }, { "epoch": 0.9, "grad_norm": 11.08181381225586, "learning_rate": 3.5977586206896554e-06, "loss": 0.2065, "step": 91375 }, { "epoch": 0.9, "grad_norm": 18.792789459228516, "learning_rate": 3.597327586206897e-06, "loss": 0.401, "step": 91400 }, { "epoch": 0.9, "grad_norm": 8.69222640991211, "learning_rate": 3.5968965517241383e-06, "loss": 0.2176, "step": 91425 }, { "epoch": 0.9, "grad_norm": 12.123641967773438, "learning_rate": 3.5964655172413798e-06, "loss": 0.3111, "step": 91450 }, { "epoch": 0.9, "grad_norm": 10.656984329223633, "learning_rate": 3.596034482758621e-06, "loss": 0.2132, "step": 91475 }, { "epoch": 0.9, "grad_norm": 15.871405601501465, "learning_rate": 3.5956034482758623e-06, "loss": 0.4032, "step": 91500 }, { "epoch": 0.9, "grad_norm": 17.723644256591797, "learning_rate": 3.5951724137931037e-06, "loss": 0.2188, "step": 91525 }, { "epoch": 0.9, "grad_norm": 19.250595092773438, "learning_rate": 3.594741379310345e-06, "loss": 0.3764, "step": 91550 }, { "epoch": 0.9, "grad_norm": 9.263467788696289, "learning_rate": 3.5943103448275862e-06, "loss": 0.1918, "step": 91575 }, { "epoch": 0.9, "grad_norm": 19.65157127380371, "learning_rate": 3.5938793103448277e-06, "loss": 0.4139, "step": 91600 }, { "epoch": 0.9, "grad_norm": 8.216362953186035, "learning_rate": 3.593448275862069e-06, "loss": 0.1983, "step": 91625 }, { "epoch": 0.9, "grad_norm": 19.59191131591797, "learning_rate": 3.5930172413793106e-06, "loss": 0.3328, "step": 91650 }, { "epoch": 0.9, "grad_norm": 4.588002681732178, "learning_rate": 3.592586206896552e-06, "loss": 0.1769, "step": 91675 }, { "epoch": 0.9, "grad_norm": 13.121015548706055, "learning_rate": 3.592155172413793e-06, "loss": 0.36, "step": 91700 }, { "epoch": 0.9, "grad_norm": 5.777691841125488, "learning_rate": 3.5917241379310345e-06, "loss": 0.1929, "step": 91725 }, { "epoch": 0.9, "grad_norm": 19.234352111816406, "learning_rate": 3.591293103448276e-06, "loss": 0.3763, "step": 91750 }, { "epoch": 0.9, "grad_norm": 8.621420860290527, "learning_rate": 3.590862068965518e-06, "loss": 0.2152, "step": 91775 }, { "epoch": 0.9, "grad_norm": 13.964277267456055, "learning_rate": 3.5904310344827585e-06, "loss": 0.3474, "step": 91800 }, { "epoch": 0.9, "grad_norm": 12.512743949890137, "learning_rate": 3.5900000000000004e-06, "loss": 0.2259, "step": 91825 }, { "epoch": 0.9, "grad_norm": 20.368206024169922, "learning_rate": 3.589568965517242e-06, "loss": 0.4047, "step": 91850 }, { "epoch": 0.9, "grad_norm": 13.063158988952637, "learning_rate": 3.5891379310344833e-06, "loss": 0.1946, "step": 91875 }, { "epoch": 0.9, "grad_norm": 13.674870491027832, "learning_rate": 3.5887068965517247e-06, "loss": 0.3644, "step": 91900 }, { "epoch": 0.9, "grad_norm": 9.413012504577637, "learning_rate": 3.5882758620689658e-06, "loss": 0.2257, "step": 91925 }, { "epoch": 0.9, "grad_norm": 14.065956115722656, "learning_rate": 3.5878448275862072e-06, "loss": 0.4009, "step": 91950 }, { "epoch": 0.9, "grad_norm": 9.856842041015625, "learning_rate": 3.5874137931034487e-06, "loss": 0.1639, "step": 91975 }, { "epoch": 0.9, "grad_norm": 15.13332748413086, "learning_rate": 3.58698275862069e-06, "loss": 0.3999, "step": 92000 }, { "epoch": 0.91, "grad_norm": 8.437738418579102, "learning_rate": 3.586551724137931e-06, "loss": 0.2029, "step": 92025 }, { "epoch": 0.91, "grad_norm": 15.725403785705566, "learning_rate": 3.5861206896551726e-06, "loss": 0.3964, "step": 92050 }, { "epoch": 0.91, "grad_norm": 9.776856422424316, "learning_rate": 3.585689655172414e-06, "loss": 0.2142, "step": 92075 }, { "epoch": 0.91, "grad_norm": 17.168609619140625, "learning_rate": 3.5852586206896555e-06, "loss": 0.3675, "step": 92100 }, { "epoch": 0.91, "grad_norm": 10.965652465820312, "learning_rate": 3.584827586206897e-06, "loss": 0.226, "step": 92125 }, { "epoch": 0.91, "grad_norm": 18.476051330566406, "learning_rate": 3.584396551724138e-06, "loss": 0.3628, "step": 92150 }, { "epoch": 0.91, "grad_norm": 9.253654479980469, "learning_rate": 3.5839655172413795e-06, "loss": 0.2369, "step": 92175 }, { "epoch": 0.91, "grad_norm": 18.034019470214844, "learning_rate": 3.583534482758621e-06, "loss": 0.3595, "step": 92200 }, { "epoch": 0.91, "grad_norm": 11.085156440734863, "learning_rate": 3.5831034482758624e-06, "loss": 0.2018, "step": 92225 }, { "epoch": 0.91, "grad_norm": 20.72171401977539, "learning_rate": 3.5826724137931034e-06, "loss": 0.418, "step": 92250 }, { "epoch": 0.91, "grad_norm": 10.96428394317627, "learning_rate": 3.582241379310345e-06, "loss": 0.1881, "step": 92275 }, { "epoch": 0.91, "grad_norm": 17.157169342041016, "learning_rate": 3.5818103448275863e-06, "loss": 0.4019, "step": 92300 }, { "epoch": 0.91, "grad_norm": 8.457035064697266, "learning_rate": 3.581379310344828e-06, "loss": 0.2251, "step": 92325 }, { "epoch": 0.91, "grad_norm": 10.846384048461914, "learning_rate": 3.5809482758620697e-06, "loss": 0.3786, "step": 92350 }, { "epoch": 0.91, "grad_norm": 9.9097261428833, "learning_rate": 3.5805172413793103e-06, "loss": 0.2056, "step": 92375 }, { "epoch": 0.91, "grad_norm": 16.761869430541992, "learning_rate": 3.580086206896552e-06, "loss": 0.351, "step": 92400 }, { "epoch": 0.91, "grad_norm": 7.192762851715088, "learning_rate": 3.5796551724137936e-06, "loss": 0.2481, "step": 92425 }, { "epoch": 0.91, "grad_norm": 14.659830093383789, "learning_rate": 3.579224137931035e-06, "loss": 0.3727, "step": 92450 }, { "epoch": 0.91, "grad_norm": 10.170915603637695, "learning_rate": 3.578793103448276e-06, "loss": 0.2128, "step": 92475 }, { "epoch": 0.91, "grad_norm": 21.734140396118164, "learning_rate": 3.5783620689655176e-06, "loss": 0.4783, "step": 92500 }, { "epoch": 0.91, "grad_norm": 11.886795997619629, "learning_rate": 3.577931034482759e-06, "loss": 0.1661, "step": 92525 }, { "epoch": 0.91, "grad_norm": 25.745506286621094, "learning_rate": 3.5775344827586213e-06, "loss": 0.3713, "step": 92550 }, { "epoch": 0.91, "grad_norm": 5.635123252868652, "learning_rate": 3.577103448275862e-06, "loss": 0.1971, "step": 92575 }, { "epoch": 0.91, "grad_norm": 16.564300537109375, "learning_rate": 3.5766724137931034e-06, "loss": 0.3859, "step": 92600 }, { "epoch": 0.91, "grad_norm": 11.650574684143066, "learning_rate": 3.5762413793103453e-06, "loss": 0.212, "step": 92625 }, { "epoch": 0.91, "grad_norm": 23.59750747680664, "learning_rate": 3.5758103448275867e-06, "loss": 0.3706, "step": 92650 }, { "epoch": 0.91, "grad_norm": 10.361859321594238, "learning_rate": 3.575379310344828e-06, "loss": 0.2507, "step": 92675 }, { "epoch": 0.91, "grad_norm": 18.92694091796875, "learning_rate": 3.5749482758620692e-06, "loss": 0.3152, "step": 92700 }, { "epoch": 0.91, "grad_norm": 9.923369407653809, "learning_rate": 3.5745172413793107e-06, "loss": 0.1859, "step": 92725 }, { "epoch": 0.91, "grad_norm": 10.702369689941406, "learning_rate": 3.574086206896552e-06, "loss": 0.3226, "step": 92750 }, { "epoch": 0.91, "grad_norm": 10.292001724243164, "learning_rate": 3.5736551724137936e-06, "loss": 0.2031, "step": 92775 }, { "epoch": 0.91, "grad_norm": 10.244429588317871, "learning_rate": 3.5732241379310346e-06, "loss": 0.3535, "step": 92800 }, { "epoch": 0.91, "grad_norm": 7.893490314483643, "learning_rate": 3.572793103448276e-06, "loss": 0.2196, "step": 92825 }, { "epoch": 0.91, "grad_norm": 22.33490753173828, "learning_rate": 3.5723620689655175e-06, "loss": 0.3509, "step": 92850 }, { "epoch": 0.91, "grad_norm": 14.335936546325684, "learning_rate": 3.571931034482759e-06, "loss": 0.2026, "step": 92875 }, { "epoch": 0.91, "grad_norm": 17.26981544494629, "learning_rate": 3.5715000000000004e-06, "loss": 0.3584, "step": 92900 }, { "epoch": 0.91, "grad_norm": 7.413197994232178, "learning_rate": 3.5710689655172415e-06, "loss": 0.2083, "step": 92925 }, { "epoch": 0.91, "grad_norm": 16.02202606201172, "learning_rate": 3.570637931034483e-06, "loss": 0.3803, "step": 92950 }, { "epoch": 0.91, "grad_norm": 5.21943473815918, "learning_rate": 3.5702068965517244e-06, "loss": 0.1973, "step": 92975 }, { "epoch": 0.91, "grad_norm": 17.332082748413086, "learning_rate": 3.569775862068966e-06, "loss": 0.37, "step": 93000 }, { "epoch": 0.91, "grad_norm": 9.112167358398438, "learning_rate": 3.569344827586207e-06, "loss": 0.2132, "step": 93025 }, { "epoch": 0.92, "grad_norm": 19.414087295532227, "learning_rate": 3.5689137931034483e-06, "loss": 0.4213, "step": 93050 }, { "epoch": 0.92, "grad_norm": 17.196165084838867, "learning_rate": 3.56848275862069e-06, "loss": 0.1988, "step": 93075 }, { "epoch": 0.92, "grad_norm": 16.560510635375977, "learning_rate": 3.5680517241379313e-06, "loss": 0.3829, "step": 93100 }, { "epoch": 0.92, "grad_norm": 10.113322257995605, "learning_rate": 3.567620689655173e-06, "loss": 0.2185, "step": 93125 }, { "epoch": 0.92, "grad_norm": 18.06502914428711, "learning_rate": 3.5671896551724137e-06, "loss": 0.3706, "step": 93150 }, { "epoch": 0.92, "grad_norm": 8.365435600280762, "learning_rate": 3.566758620689655e-06, "loss": 0.2614, "step": 93175 }, { "epoch": 0.92, "grad_norm": 14.093559265136719, "learning_rate": 3.566327586206897e-06, "loss": 0.3246, "step": 93200 }, { "epoch": 0.92, "grad_norm": 11.586827278137207, "learning_rate": 3.5658965517241385e-06, "loss": 0.2694, "step": 93225 }, { "epoch": 0.92, "grad_norm": 15.858085632324219, "learning_rate": 3.565465517241379e-06, "loss": 0.4013, "step": 93250 }, { "epoch": 0.92, "grad_norm": 9.180702209472656, "learning_rate": 3.565034482758621e-06, "loss": 0.1834, "step": 93275 }, { "epoch": 0.92, "grad_norm": 23.049833297729492, "learning_rate": 3.5646034482758625e-06, "loss": 0.368, "step": 93300 }, { "epoch": 0.92, "grad_norm": 13.137402534484863, "learning_rate": 3.564172413793104e-06, "loss": 0.1682, "step": 93325 }, { "epoch": 0.92, "grad_norm": 16.996755599975586, "learning_rate": 3.5637413793103454e-06, "loss": 0.3301, "step": 93350 }, { "epoch": 0.92, "grad_norm": 5.096767425537109, "learning_rate": 3.5633103448275864e-06, "loss": 0.1852, "step": 93375 }, { "epoch": 0.92, "grad_norm": 14.502662658691406, "learning_rate": 3.562879310344828e-06, "loss": 0.3984, "step": 93400 }, { "epoch": 0.92, "grad_norm": 14.69672679901123, "learning_rate": 3.5624482758620693e-06, "loss": 0.2833, "step": 93425 }, { "epoch": 0.92, "grad_norm": 14.324502944946289, "learning_rate": 3.562017241379311e-06, "loss": 0.353, "step": 93450 }, { "epoch": 0.92, "grad_norm": 14.24505615234375, "learning_rate": 3.561586206896552e-06, "loss": 0.225, "step": 93475 }, { "epoch": 0.92, "grad_norm": 20.62904167175293, "learning_rate": 3.5611551724137933e-06, "loss": 0.4018, "step": 93500 }, { "epoch": 0.92, "grad_norm": 10.165318489074707, "learning_rate": 3.5607586206896556e-06, "loss": 0.1951, "step": 93525 }, { "epoch": 0.92, "grad_norm": 12.637361526489258, "learning_rate": 3.560327586206897e-06, "loss": 0.3421, "step": 93550 }, { "epoch": 0.92, "grad_norm": 14.649415969848633, "learning_rate": 3.559896551724138e-06, "loss": 0.2277, "step": 93575 }, { "epoch": 0.92, "grad_norm": 20.234453201293945, "learning_rate": 3.5594655172413795e-06, "loss": 0.3205, "step": 93600 }, { "epoch": 0.92, "grad_norm": 14.742210388183594, "learning_rate": 3.559034482758621e-06, "loss": 0.2578, "step": 93625 }, { "epoch": 0.92, "grad_norm": 19.346885681152344, "learning_rate": 3.5586034482758625e-06, "loss": 0.3405, "step": 93650 }, { "epoch": 0.92, "grad_norm": 9.709547996520996, "learning_rate": 3.558172413793104e-06, "loss": 0.2327, "step": 93675 }, { "epoch": 0.92, "grad_norm": 22.107126235961914, "learning_rate": 3.557741379310345e-06, "loss": 0.4125, "step": 93700 }, { "epoch": 0.92, "grad_norm": 1.8837248086929321, "learning_rate": 3.5573103448275864e-06, "loss": 0.2013, "step": 93725 }, { "epoch": 0.92, "grad_norm": 18.85214614868164, "learning_rate": 3.556879310344828e-06, "loss": 0.3837, "step": 93750 }, { "epoch": 0.92, "grad_norm": 6.926147937774658, "learning_rate": 3.5564482758620693e-06, "loss": 0.1917, "step": 93775 }, { "epoch": 0.92, "grad_norm": 15.48702335357666, "learning_rate": 3.5560172413793103e-06, "loss": 0.3693, "step": 93800 }, { "epoch": 0.92, "grad_norm": 8.887727737426758, "learning_rate": 3.555586206896552e-06, "loss": 0.1657, "step": 93825 }, { "epoch": 0.92, "grad_norm": 9.545767784118652, "learning_rate": 3.5551551724137933e-06, "loss": 0.3372, "step": 93850 }, { "epoch": 0.92, "grad_norm": 9.002913475036621, "learning_rate": 3.5547241379310347e-06, "loss": 0.2098, "step": 93875 }, { "epoch": 0.92, "grad_norm": 16.178661346435547, "learning_rate": 3.554293103448276e-06, "loss": 0.3689, "step": 93900 }, { "epoch": 0.92, "grad_norm": 11.569357872009277, "learning_rate": 3.553862068965517e-06, "loss": 0.1799, "step": 93925 }, { "epoch": 0.92, "grad_norm": 13.576531410217285, "learning_rate": 3.5534310344827587e-06, "loss": 0.3897, "step": 93950 }, { "epoch": 0.92, "grad_norm": 7.718629837036133, "learning_rate": 3.5530000000000005e-06, "loss": 0.2451, "step": 93975 }, { "epoch": 0.92, "grad_norm": 12.937939643859863, "learning_rate": 3.552568965517242e-06, "loss": 0.3511, "step": 94000 }, { "epoch": 0.92, "grad_norm": 5.770434379577637, "learning_rate": 3.5521379310344826e-06, "loss": 0.1816, "step": 94025 }, { "epoch": 0.93, "grad_norm": 21.69088363647461, "learning_rate": 3.5517068965517245e-06, "loss": 0.3969, "step": 94050 }, { "epoch": 0.93, "grad_norm": 10.500082015991211, "learning_rate": 3.551275862068966e-06, "loss": 0.2846, "step": 94075 }, { "epoch": 0.93, "grad_norm": 11.300451278686523, "learning_rate": 3.5508448275862074e-06, "loss": 0.3247, "step": 94100 }, { "epoch": 0.93, "grad_norm": 6.820154666900635, "learning_rate": 3.550413793103449e-06, "loss": 0.1785, "step": 94125 }, { "epoch": 0.93, "grad_norm": 20.104890823364258, "learning_rate": 3.54998275862069e-06, "loss": 0.3687, "step": 94150 }, { "epoch": 0.93, "grad_norm": 11.498945236206055, "learning_rate": 3.5495517241379313e-06, "loss": 0.1908, "step": 94175 }, { "epoch": 0.93, "grad_norm": 16.98555564880371, "learning_rate": 3.549120689655173e-06, "loss": 0.3899, "step": 94200 }, { "epoch": 0.93, "grad_norm": 5.333455562591553, "learning_rate": 3.5486896551724143e-06, "loss": 0.1807, "step": 94225 }, { "epoch": 0.93, "grad_norm": 16.416500091552734, "learning_rate": 3.5482586206896553e-06, "loss": 0.3317, "step": 94250 }, { "epoch": 0.93, "grad_norm": 13.513339042663574, "learning_rate": 3.5478275862068967e-06, "loss": 0.2129, "step": 94275 }, { "epoch": 0.93, "grad_norm": 15.588797569274902, "learning_rate": 3.547396551724138e-06, "loss": 0.4296, "step": 94300 }, { "epoch": 0.93, "grad_norm": 10.648852348327637, "learning_rate": 3.5469655172413797e-06, "loss": 0.2441, "step": 94325 }, { "epoch": 0.93, "grad_norm": 20.104188919067383, "learning_rate": 3.546534482758621e-06, "loss": 0.4544, "step": 94350 }, { "epoch": 0.93, "grad_norm": 14.509130477905273, "learning_rate": 3.546103448275862e-06, "loss": 0.2081, "step": 94375 }, { "epoch": 0.93, "grad_norm": 18.67010498046875, "learning_rate": 3.5456724137931036e-06, "loss": 0.4472, "step": 94400 }, { "epoch": 0.93, "grad_norm": 0.9539594650268555, "learning_rate": 3.545241379310345e-06, "loss": 0.1901, "step": 94425 }, { "epoch": 0.93, "grad_norm": 24.49835205078125, "learning_rate": 3.5448103448275865e-06, "loss": 0.3859, "step": 94450 }, { "epoch": 0.93, "grad_norm": 9.704756736755371, "learning_rate": 3.5443793103448276e-06, "loss": 0.2318, "step": 94475 }, { "epoch": 0.93, "grad_norm": 15.457919120788574, "learning_rate": 3.543948275862069e-06, "loss": 0.3551, "step": 94500 }, { "epoch": 0.93, "grad_norm": 11.859614372253418, "learning_rate": 3.5435172413793105e-06, "loss": 0.2201, "step": 94525 }, { "epoch": 0.93, "grad_norm": 12.384027481079102, "learning_rate": 3.5430862068965523e-06, "loss": 0.3385, "step": 94550 }, { "epoch": 0.93, "grad_norm": 3.5260398387908936, "learning_rate": 3.542655172413794e-06, "loss": 0.1672, "step": 94575 }, { "epoch": 0.93, "grad_norm": 18.0322322845459, "learning_rate": 3.5422241379310344e-06, "loss": 0.3298, "step": 94600 }, { "epoch": 0.93, "grad_norm": 5.00106954574585, "learning_rate": 3.5417931034482763e-06, "loss": 0.1962, "step": 94625 }, { "epoch": 0.93, "grad_norm": 18.93651580810547, "learning_rate": 3.5413620689655177e-06, "loss": 0.3242, "step": 94650 }, { "epoch": 0.93, "grad_norm": 5.95339298248291, "learning_rate": 3.540931034482759e-06, "loss": 0.2074, "step": 94675 }, { "epoch": 0.93, "grad_norm": 8.085055351257324, "learning_rate": 3.5405000000000002e-06, "loss": 0.3485, "step": 94700 }, { "epoch": 0.93, "grad_norm": 7.410332679748535, "learning_rate": 3.5400689655172417e-06, "loss": 0.1961, "step": 94725 }, { "epoch": 0.93, "grad_norm": 20.24571418762207, "learning_rate": 3.539637931034483e-06, "loss": 0.4074, "step": 94750 }, { "epoch": 0.93, "grad_norm": 10.349404335021973, "learning_rate": 3.5392068965517246e-06, "loss": 0.2354, "step": 94775 }, { "epoch": 0.93, "grad_norm": 8.552905082702637, "learning_rate": 3.538775862068966e-06, "loss": 0.2985, "step": 94800 }, { "epoch": 0.93, "grad_norm": 5.519542217254639, "learning_rate": 3.538344827586207e-06, "loss": 0.2172, "step": 94825 }, { "epoch": 0.93, "grad_norm": 16.911151885986328, "learning_rate": 3.5379137931034486e-06, "loss": 0.3821, "step": 94850 }, { "epoch": 0.93, "grad_norm": 6.1133503913879395, "learning_rate": 3.53748275862069e-06, "loss": 0.1737, "step": 94875 }, { "epoch": 0.93, "grad_norm": 26.32500648498535, "learning_rate": 3.5370517241379315e-06, "loss": 0.3636, "step": 94900 }, { "epoch": 0.93, "grad_norm": 9.732344627380371, "learning_rate": 3.5366206896551725e-06, "loss": 0.1887, "step": 94925 }, { "epoch": 0.93, "grad_norm": 24.802940368652344, "learning_rate": 3.536189655172414e-06, "loss": 0.4018, "step": 94950 }, { "epoch": 0.93, "grad_norm": 11.663778305053711, "learning_rate": 3.5357586206896554e-06, "loss": 0.1859, "step": 94975 }, { "epoch": 0.93, "grad_norm": 23.178735733032227, "learning_rate": 3.535327586206897e-06, "loss": 0.3819, "step": 95000 }, { "epoch": 0.93, "grad_norm": 8.684389114379883, "learning_rate": 3.5348965517241383e-06, "loss": 0.1811, "step": 95025 }, { "epoch": 0.93, "grad_norm": 12.862778663635254, "learning_rate": 3.5344655172413794e-06, "loss": 0.4015, "step": 95050 }, { "epoch": 0.94, "grad_norm": 7.610566139221191, "learning_rate": 3.534034482758621e-06, "loss": 0.1771, "step": 95075 }, { "epoch": 0.94, "grad_norm": 22.62749481201172, "learning_rate": 3.5336034482758623e-06, "loss": 0.3939, "step": 95100 }, { "epoch": 0.94, "grad_norm": 14.767937660217285, "learning_rate": 3.533172413793104e-06, "loss": 0.2026, "step": 95125 }, { "epoch": 0.94, "grad_norm": 21.508066177368164, "learning_rate": 3.5327413793103448e-06, "loss": 0.3734, "step": 95150 }, { "epoch": 0.94, "grad_norm": 9.691933631896973, "learning_rate": 3.5323103448275862e-06, "loss": 0.2049, "step": 95175 }, { "epoch": 0.94, "grad_norm": 13.633696556091309, "learning_rate": 3.531879310344828e-06, "loss": 0.3821, "step": 95200 }, { "epoch": 0.94, "grad_norm": 8.816186904907227, "learning_rate": 3.5314482758620696e-06, "loss": 0.2298, "step": 95225 }, { "epoch": 0.94, "grad_norm": 18.62116241455078, "learning_rate": 3.531017241379311e-06, "loss": 0.3559, "step": 95250 }, { "epoch": 0.94, "grad_norm": 6.549502849578857, "learning_rate": 3.530586206896552e-06, "loss": 0.2044, "step": 95275 }, { "epoch": 0.94, "grad_norm": 18.45738983154297, "learning_rate": 3.5301551724137935e-06, "loss": 0.4042, "step": 95300 }, { "epoch": 0.94, "grad_norm": 11.841790199279785, "learning_rate": 3.529724137931035e-06, "loss": 0.2287, "step": 95325 }, { "epoch": 0.94, "grad_norm": 15.99263858795166, "learning_rate": 3.5292931034482764e-06, "loss": 0.4018, "step": 95350 }, { "epoch": 0.94, "grad_norm": 6.706634044647217, "learning_rate": 3.5288620689655174e-06, "loss": 0.2443, "step": 95375 }, { "epoch": 0.94, "grad_norm": 17.272216796875, "learning_rate": 3.528431034482759e-06, "loss": 0.3966, "step": 95400 }, { "epoch": 0.94, "grad_norm": 20.092683792114258, "learning_rate": 3.5280000000000004e-06, "loss": 0.2035, "step": 95425 }, { "epoch": 0.94, "grad_norm": 14.655130386352539, "learning_rate": 3.527568965517242e-06, "loss": 0.3041, "step": 95450 }, { "epoch": 0.94, "grad_norm": 7.35845422744751, "learning_rate": 3.5271379310344833e-06, "loss": 0.2276, "step": 95475 }, { "epoch": 0.94, "grad_norm": 10.149626731872559, "learning_rate": 3.5267068965517243e-06, "loss": 0.3607, "step": 95500 }, { "epoch": 0.94, "grad_norm": 6.743387222290039, "learning_rate": 3.5262758620689658e-06, "loss": 0.22, "step": 95525 }, { "epoch": 0.94, "grad_norm": 21.330026626586914, "learning_rate": 3.5258448275862072e-06, "loss": 0.3731, "step": 95550 }, { "epoch": 0.94, "grad_norm": 13.573677062988281, "learning_rate": 3.5254137931034487e-06, "loss": 0.2192, "step": 95575 }, { "epoch": 0.94, "grad_norm": 23.268238067626953, "learning_rate": 3.5249827586206897e-06, "loss": 0.35, "step": 95600 }, { "epoch": 0.94, "grad_norm": 8.444611549377441, "learning_rate": 3.524551724137931e-06, "loss": 0.2086, "step": 95625 }, { "epoch": 0.94, "grad_norm": 24.30377769470215, "learning_rate": 3.5241206896551726e-06, "loss": 0.4188, "step": 95650 }, { "epoch": 0.94, "grad_norm": 12.238382339477539, "learning_rate": 3.523689655172414e-06, "loss": 0.1774, "step": 95675 }, { "epoch": 0.94, "grad_norm": 16.797340393066406, "learning_rate": 3.523258620689655e-06, "loss": 0.3617, "step": 95700 }, { "epoch": 0.94, "grad_norm": 4.61725378036499, "learning_rate": 3.5228275862068966e-06, "loss": 0.2155, "step": 95725 }, { "epoch": 0.94, "grad_norm": 16.52860450744629, "learning_rate": 3.522396551724138e-06, "loss": 0.3239, "step": 95750 }, { "epoch": 0.94, "grad_norm": 9.353958129882812, "learning_rate": 3.52196551724138e-06, "loss": 0.2113, "step": 95775 }, { "epoch": 0.94, "grad_norm": 19.59450912475586, "learning_rate": 3.5215344827586214e-06, "loss": 0.4236, "step": 95800 }, { "epoch": 0.94, "grad_norm": 11.471752166748047, "learning_rate": 3.521103448275862e-06, "loss": 0.2126, "step": 95825 }, { "epoch": 0.94, "grad_norm": 23.833585739135742, "learning_rate": 3.520672413793104e-06, "loss": 0.4145, "step": 95850 }, { "epoch": 0.94, "grad_norm": 7.857391357421875, "learning_rate": 3.5202413793103453e-06, "loss": 0.1883, "step": 95875 }, { "epoch": 0.94, "grad_norm": 19.491390228271484, "learning_rate": 3.5198103448275868e-06, "loss": 0.3679, "step": 95900 }, { "epoch": 0.94, "grad_norm": 10.756388664245605, "learning_rate": 3.519379310344828e-06, "loss": 0.2433, "step": 95925 }, { "epoch": 0.94, "grad_norm": 14.546956062316895, "learning_rate": 3.5189482758620692e-06, "loss": 0.3816, "step": 95950 }, { "epoch": 0.94, "grad_norm": 8.395797729492188, "learning_rate": 3.5185172413793107e-06, "loss": 0.2056, "step": 95975 }, { "epoch": 0.94, "grad_norm": 19.743730545043945, "learning_rate": 3.518086206896552e-06, "loss": 0.3852, "step": 96000 }, { "epoch": 0.94, "grad_norm": 8.895967483520508, "learning_rate": 3.5176551724137936e-06, "loss": 0.2238, "step": 96025 }, { "epoch": 0.94, "grad_norm": 11.84493350982666, "learning_rate": 3.5172241379310347e-06, "loss": 0.3487, "step": 96050 }, { "epoch": 0.94, "grad_norm": 8.606966018676758, "learning_rate": 3.516793103448276e-06, "loss": 0.2081, "step": 96075 }, { "epoch": 0.95, "grad_norm": 15.993688583374023, "learning_rate": 3.5163620689655176e-06, "loss": 0.4039, "step": 96100 }, { "epoch": 0.95, "grad_norm": 9.302550315856934, "learning_rate": 3.515931034482759e-06, "loss": 0.1548, "step": 96125 }, { "epoch": 0.95, "grad_norm": 21.441001892089844, "learning_rate": 3.5155e-06, "loss": 0.3366, "step": 96150 }, { "epoch": 0.95, "grad_norm": 3.7849326133728027, "learning_rate": 3.5150689655172415e-06, "loss": 0.211, "step": 96175 }, { "epoch": 0.95, "grad_norm": 11.264387130737305, "learning_rate": 3.514637931034483e-06, "loss": 0.3592, "step": 96200 }, { "epoch": 0.95, "grad_norm": 8.27861213684082, "learning_rate": 3.5142068965517244e-06, "loss": 0.1818, "step": 96225 }, { "epoch": 0.95, "grad_norm": 11.779680252075195, "learning_rate": 3.513775862068966e-06, "loss": 0.3506, "step": 96250 }, { "epoch": 0.95, "grad_norm": 11.051738739013672, "learning_rate": 3.513344827586207e-06, "loss": 0.2345, "step": 96275 }, { "epoch": 0.95, "grad_norm": 19.102703094482422, "learning_rate": 3.5129137931034484e-06, "loss": 0.3924, "step": 96300 }, { "epoch": 0.95, "grad_norm": 7.57668399810791, "learning_rate": 3.51248275862069e-06, "loss": 0.2081, "step": 96325 }, { "epoch": 0.95, "grad_norm": 18.143569946289062, "learning_rate": 3.5120517241379317e-06, "loss": 0.3278, "step": 96350 }, { "epoch": 0.95, "grad_norm": 12.641730308532715, "learning_rate": 3.5116206896551723e-06, "loss": 0.2484, "step": 96375 }, { "epoch": 0.95, "grad_norm": 4.774481296539307, "learning_rate": 3.5111896551724138e-06, "loss": 0.3428, "step": 96400 }, { "epoch": 0.95, "grad_norm": 14.592556953430176, "learning_rate": 3.5107586206896557e-06, "loss": 0.2592, "step": 96425 }, { "epoch": 0.95, "grad_norm": 12.424602508544922, "learning_rate": 3.510327586206897e-06, "loss": 0.3741, "step": 96450 }, { "epoch": 0.95, "grad_norm": 8.20738697052002, "learning_rate": 3.5098965517241386e-06, "loss": 0.1857, "step": 96475 }, { "epoch": 0.95, "grad_norm": 14.539521217346191, "learning_rate": 3.5094655172413796e-06, "loss": 0.3021, "step": 96500 }, { "epoch": 0.95, "grad_norm": 20.07499122619629, "learning_rate": 3.509034482758621e-06, "loss": 0.1997, "step": 96525 }, { "epoch": 0.95, "grad_norm": 10.253410339355469, "learning_rate": 3.5086034482758625e-06, "loss": 0.3642, "step": 96550 }, { "epoch": 0.95, "grad_norm": 10.625567436218262, "learning_rate": 3.508172413793104e-06, "loss": 0.2348, "step": 96575 }, { "epoch": 0.95, "grad_norm": 22.040550231933594, "learning_rate": 3.507741379310345e-06, "loss": 0.3521, "step": 96600 }, { "epoch": 0.95, "grad_norm": 10.100386619567871, "learning_rate": 3.5073103448275865e-06, "loss": 0.1834, "step": 96625 }, { "epoch": 0.95, "grad_norm": 15.537674903869629, "learning_rate": 3.506879310344828e-06, "loss": 0.3611, "step": 96650 }, { "epoch": 0.95, "grad_norm": 6.240860462188721, "learning_rate": 3.5064482758620694e-06, "loss": 0.1734, "step": 96675 }, { "epoch": 0.95, "grad_norm": 15.730910301208496, "learning_rate": 3.506017241379311e-06, "loss": 0.3121, "step": 96700 }, { "epoch": 0.95, "grad_norm": 9.6099853515625, "learning_rate": 3.505586206896552e-06, "loss": 0.2436, "step": 96725 }, { "epoch": 0.95, "grad_norm": 48.37184143066406, "learning_rate": 3.5051551724137933e-06, "loss": 0.3879, "step": 96750 }, { "epoch": 0.95, "grad_norm": 10.65452766418457, "learning_rate": 3.5047241379310348e-06, "loss": 0.1822, "step": 96775 }, { "epoch": 0.95, "grad_norm": 14.942939758300781, "learning_rate": 3.5042931034482762e-06, "loss": 0.4395, "step": 96800 }, { "epoch": 0.95, "grad_norm": 5.719793796539307, "learning_rate": 3.5038620689655173e-06, "loss": 0.1824, "step": 96825 }, { "epoch": 0.95, "grad_norm": 18.95462417602539, "learning_rate": 3.5034310344827587e-06, "loss": 0.3646, "step": 96850 }, { "epoch": 0.95, "grad_norm": 12.632521629333496, "learning_rate": 3.503e-06, "loss": 0.1751, "step": 96875 }, { "epoch": 0.95, "grad_norm": 22.015377044677734, "learning_rate": 3.5025689655172416e-06, "loss": 0.3942, "step": 96900 }, { "epoch": 0.95, "grad_norm": 4.580118656158447, "learning_rate": 3.502137931034483e-06, "loss": 0.1808, "step": 96925 }, { "epoch": 0.95, "grad_norm": 6.278651237487793, "learning_rate": 3.501706896551724e-06, "loss": 0.3686, "step": 96950 }, { "epoch": 0.95, "grad_norm": 10.076936721801758, "learning_rate": 3.5012758620689656e-06, "loss": 0.2074, "step": 96975 }, { "epoch": 0.95, "grad_norm": 18.108108520507812, "learning_rate": 3.5008448275862075e-06, "loss": 0.3313, "step": 97000 }, { "epoch": 0.95, "grad_norm": 11.831382751464844, "learning_rate": 3.500413793103449e-06, "loss": 0.1899, "step": 97025 }, { "epoch": 0.95, "grad_norm": 14.689850807189941, "learning_rate": 3.4999827586206895e-06, "loss": 0.3308, "step": 97050 }, { "epoch": 0.95, "grad_norm": 12.591190338134766, "learning_rate": 3.4995517241379314e-06, "loss": 0.2183, "step": 97075 }, { "epoch": 0.96, "grad_norm": 20.835203170776367, "learning_rate": 3.499120689655173e-06, "loss": 0.3254, "step": 97100 }, { "epoch": 0.96, "grad_norm": 10.902117729187012, "learning_rate": 3.4986896551724143e-06, "loss": 0.2184, "step": 97125 }, { "epoch": 0.96, "grad_norm": 19.63136100769043, "learning_rate": 3.4982586206896558e-06, "loss": 0.3868, "step": 97150 }, { "epoch": 0.96, "grad_norm": 10.009188652038574, "learning_rate": 3.497827586206897e-06, "loss": 0.2188, "step": 97175 }, { "epoch": 0.96, "grad_norm": 16.200408935546875, "learning_rate": 3.4973965517241383e-06, "loss": 0.3518, "step": 97200 }, { "epoch": 0.96, "grad_norm": 7.388946056365967, "learning_rate": 3.4969655172413797e-06, "loss": 0.1982, "step": 97225 }, { "epoch": 0.96, "grad_norm": 23.829389572143555, "learning_rate": 3.496534482758621e-06, "loss": 0.3627, "step": 97250 }, { "epoch": 0.96, "grad_norm": 8.959195137023926, "learning_rate": 3.496103448275862e-06, "loss": 0.2136, "step": 97275 }, { "epoch": 0.96, "grad_norm": 14.50963306427002, "learning_rate": 3.4956724137931037e-06, "loss": 0.353, "step": 97300 }, { "epoch": 0.96, "grad_norm": 7.829252243041992, "learning_rate": 3.495241379310345e-06, "loss": 0.208, "step": 97325 }, { "epoch": 0.96, "grad_norm": 14.387444496154785, "learning_rate": 3.4948103448275866e-06, "loss": 0.4189, "step": 97350 }, { "epoch": 0.96, "grad_norm": 11.07213020324707, "learning_rate": 3.494379310344828e-06, "loss": 0.2347, "step": 97375 }, { "epoch": 0.96, "grad_norm": 10.067138671875, "learning_rate": 3.493948275862069e-06, "loss": 0.4029, "step": 97400 }, { "epoch": 0.96, "grad_norm": 8.502622604370117, "learning_rate": 3.4935172413793105e-06, "loss": 0.2219, "step": 97425 }, { "epoch": 0.96, "grad_norm": 11.865537643432617, "learning_rate": 3.493086206896552e-06, "loss": 0.3727, "step": 97450 }, { "epoch": 0.96, "grad_norm": 3.8228578567504883, "learning_rate": 3.4926551724137934e-06, "loss": 0.1996, "step": 97475 }, { "epoch": 0.96, "grad_norm": 15.727027893066406, "learning_rate": 3.4922241379310345e-06, "loss": 0.3024, "step": 97500 }, { "epoch": 0.96, "grad_norm": 7.1656813621521, "learning_rate": 3.491793103448276e-06, "loss": 0.2239, "step": 97525 }, { "epoch": 0.96, "grad_norm": 10.92082691192627, "learning_rate": 3.4913620689655174e-06, "loss": 0.3128, "step": 97550 }, { "epoch": 0.96, "grad_norm": 9.047375679016113, "learning_rate": 3.4909310344827593e-06, "loss": 0.2255, "step": 97575 }, { "epoch": 0.96, "grad_norm": 17.881214141845703, "learning_rate": 3.4905000000000007e-06, "loss": 0.3757, "step": 97600 }, { "epoch": 0.96, "grad_norm": 8.472467422485352, "learning_rate": 3.4900689655172413e-06, "loss": 0.2087, "step": 97625 }, { "epoch": 0.96, "grad_norm": 16.475814819335938, "learning_rate": 3.489637931034483e-06, "loss": 0.3099, "step": 97650 }, { "epoch": 0.96, "grad_norm": 7.907323837280273, "learning_rate": 3.4892068965517247e-06, "loss": 0.2446, "step": 97675 }, { "epoch": 0.96, "grad_norm": 22.41193389892578, "learning_rate": 3.488775862068966e-06, "loss": 0.3598, "step": 97700 }, { "epoch": 0.96, "grad_norm": 12.485090255737305, "learning_rate": 3.488344827586207e-06, "loss": 0.191, "step": 97725 }, { "epoch": 0.96, "grad_norm": 9.287330627441406, "learning_rate": 3.4879137931034486e-06, "loss": 0.3637, "step": 97750 }, { "epoch": 0.96, "grad_norm": 8.11060619354248, "learning_rate": 3.48748275862069e-06, "loss": 0.1711, "step": 97775 }, { "epoch": 0.96, "grad_norm": 19.82415199279785, "learning_rate": 3.4870517241379315e-06, "loss": 0.3559, "step": 97800 }, { "epoch": 0.96, "grad_norm": 20.461193084716797, "learning_rate": 3.486620689655173e-06, "loss": 0.1993, "step": 97825 }, { "epoch": 0.96, "grad_norm": 15.001440048217773, "learning_rate": 3.486189655172414e-06, "loss": 0.3284, "step": 97850 }, { "epoch": 0.96, "grad_norm": 10.34247875213623, "learning_rate": 3.4857586206896555e-06, "loss": 0.2554, "step": 97875 }, { "epoch": 0.96, "grad_norm": 14.779970169067383, "learning_rate": 3.485327586206897e-06, "loss": 0.3664, "step": 97900 }, { "epoch": 0.96, "grad_norm": 11.481099128723145, "learning_rate": 3.4848965517241384e-06, "loss": 0.2094, "step": 97925 }, { "epoch": 0.96, "grad_norm": 22.832101821899414, "learning_rate": 3.4844655172413794e-06, "loss": 0.3575, "step": 97950 }, { "epoch": 0.96, "grad_norm": 9.126255989074707, "learning_rate": 3.484034482758621e-06, "loss": 0.207, "step": 97975 }, { "epoch": 0.96, "grad_norm": 12.001483917236328, "learning_rate": 3.4836034482758623e-06, "loss": 0.372, "step": 98000 }, { "epoch": 0.96, "grad_norm": 9.653796195983887, "learning_rate": 3.4831724137931038e-06, "loss": 0.2383, "step": 98025 }, { "epoch": 0.96, "grad_norm": 14.96887493133545, "learning_rate": 3.482741379310345e-06, "loss": 0.3699, "step": 98050 }, { "epoch": 0.96, "grad_norm": 13.636832237243652, "learning_rate": 3.4823103448275863e-06, "loss": 0.2385, "step": 98075 }, { "epoch": 0.96, "grad_norm": 17.725936889648438, "learning_rate": 3.4818793103448277e-06, "loss": 0.3603, "step": 98100 }, { "epoch": 0.97, "grad_norm": 9.443355560302734, "learning_rate": 3.481448275862069e-06, "loss": 0.2248, "step": 98125 }, { "epoch": 0.97, "grad_norm": 22.427444458007812, "learning_rate": 3.4810172413793106e-06, "loss": 0.3761, "step": 98150 }, { "epoch": 0.97, "grad_norm": 8.722413063049316, "learning_rate": 3.4805862068965517e-06, "loss": 0.1997, "step": 98175 }, { "epoch": 0.97, "grad_norm": 26.327648162841797, "learning_rate": 3.480155172413793e-06, "loss": 0.3676, "step": 98200 }, { "epoch": 0.97, "grad_norm": 5.923524379730225, "learning_rate": 3.479724137931035e-06, "loss": 0.2142, "step": 98225 }, { "epoch": 0.97, "grad_norm": 22.272573471069336, "learning_rate": 3.4792931034482765e-06, "loss": 0.3414, "step": 98250 }, { "epoch": 0.97, "grad_norm": 9.254578590393066, "learning_rate": 3.478862068965517e-06, "loss": 0.2192, "step": 98275 }, { "epoch": 0.97, "grad_norm": 9.78604507446289, "learning_rate": 3.478431034482759e-06, "loss": 0.3581, "step": 98300 }, { "epoch": 0.97, "grad_norm": 8.668484687805176, "learning_rate": 3.4780000000000004e-06, "loss": 0.2121, "step": 98325 }, { "epoch": 0.97, "grad_norm": 10.860576629638672, "learning_rate": 3.477568965517242e-06, "loss": 0.3664, "step": 98350 }, { "epoch": 0.97, "grad_norm": 7.438033103942871, "learning_rate": 3.4771379310344833e-06, "loss": 0.219, "step": 98375 }, { "epoch": 0.97, "grad_norm": 9.992013931274414, "learning_rate": 3.4767068965517244e-06, "loss": 0.3354, "step": 98400 }, { "epoch": 0.97, "grad_norm": 7.08281946182251, "learning_rate": 3.476275862068966e-06, "loss": 0.1574, "step": 98425 }, { "epoch": 0.97, "grad_norm": 14.947718620300293, "learning_rate": 3.4758448275862073e-06, "loss": 0.4238, "step": 98450 }, { "epoch": 0.97, "grad_norm": 10.978819847106934, "learning_rate": 3.4754137931034487e-06, "loss": 0.1756, "step": 98475 }, { "epoch": 0.97, "grad_norm": 20.479047775268555, "learning_rate": 3.4749827586206898e-06, "loss": 0.3169, "step": 98500 }, { "epoch": 0.97, "grad_norm": 5.305394172668457, "learning_rate": 3.4745517241379312e-06, "loss": 0.2062, "step": 98525 }, { "epoch": 0.97, "grad_norm": 12.137192726135254, "learning_rate": 3.4741206896551727e-06, "loss": 0.3542, "step": 98550 }, { "epoch": 0.97, "grad_norm": 9.841253280639648, "learning_rate": 3.473689655172414e-06, "loss": 0.2038, "step": 98575 }, { "epoch": 0.97, "grad_norm": 16.00995445251465, "learning_rate": 3.4732586206896556e-06, "loss": 0.3848, "step": 98600 }, { "epoch": 0.97, "grad_norm": 14.617993354797363, "learning_rate": 3.4728275862068966e-06, "loss": 0.2127, "step": 98625 }, { "epoch": 0.97, "grad_norm": 13.439397811889648, "learning_rate": 3.472396551724138e-06, "loss": 0.3251, "step": 98650 }, { "epoch": 0.97, "grad_norm": 10.721044540405273, "learning_rate": 3.4719655172413795e-06, "loss": 0.2085, "step": 98675 }, { "epoch": 0.97, "grad_norm": 20.076316833496094, "learning_rate": 3.471534482758621e-06, "loss": 0.3604, "step": 98700 }, { "epoch": 0.97, "grad_norm": 11.953937530517578, "learning_rate": 3.471103448275862e-06, "loss": 0.1695, "step": 98725 }, { "epoch": 0.97, "grad_norm": 23.771028518676758, "learning_rate": 3.4706724137931035e-06, "loss": 0.3691, "step": 98750 }, { "epoch": 0.97, "grad_norm": 11.399702072143555, "learning_rate": 3.470241379310345e-06, "loss": 0.2195, "step": 98775 }, { "epoch": 0.97, "grad_norm": 18.383590698242188, "learning_rate": 3.469810344827587e-06, "loss": 0.3428, "step": 98800 }, { "epoch": 0.97, "grad_norm": 8.122711181640625, "learning_rate": 3.4693793103448283e-06, "loss": 0.2256, "step": 98825 }, { "epoch": 0.97, "grad_norm": 24.066675186157227, "learning_rate": 3.468948275862069e-06, "loss": 0.4035, "step": 98850 }, { "epoch": 0.97, "grad_norm": 8.102073669433594, "learning_rate": 3.4685172413793108e-06, "loss": 0.1811, "step": 98875 }, { "epoch": 0.97, "grad_norm": 16.89650535583496, "learning_rate": 3.4680862068965522e-06, "loss": 0.3799, "step": 98900 }, { "epoch": 0.97, "grad_norm": 4.750442028045654, "learning_rate": 3.4676551724137937e-06, "loss": 0.1932, "step": 98925 }, { "epoch": 0.97, "grad_norm": 22.531160354614258, "learning_rate": 3.4672241379310347e-06, "loss": 0.3611, "step": 98950 }, { "epoch": 0.97, "grad_norm": 6.721494197845459, "learning_rate": 3.466793103448276e-06, "loss": 0.193, "step": 98975 }, { "epoch": 0.97, "grad_norm": 19.24203872680664, "learning_rate": 3.4663620689655176e-06, "loss": 0.3795, "step": 99000 }, { "epoch": 0.97, "grad_norm": 4.070175647735596, "learning_rate": 3.465931034482759e-06, "loss": 0.1964, "step": 99025 }, { "epoch": 0.97, "grad_norm": 17.897754669189453, "learning_rate": 3.4655000000000005e-06, "loss": 0.3428, "step": 99050 }, { "epoch": 0.97, "grad_norm": 40.59501647949219, "learning_rate": 3.4650689655172416e-06, "loss": 0.2046, "step": 99075 }, { "epoch": 0.97, "grad_norm": 16.689208984375, "learning_rate": 3.464637931034483e-06, "loss": 0.3314, "step": 99100 }, { "epoch": 0.97, "grad_norm": 9.564162254333496, "learning_rate": 3.4642068965517245e-06, "loss": 0.1682, "step": 99125 }, { "epoch": 0.98, "grad_norm": 15.16122055053711, "learning_rate": 3.463775862068966e-06, "loss": 0.4263, "step": 99150 }, { "epoch": 0.98, "grad_norm": 12.394766807556152, "learning_rate": 3.463344827586207e-06, "loss": 0.1972, "step": 99175 }, { "epoch": 0.98, "grad_norm": 15.801996231079102, "learning_rate": 3.4629137931034484e-06, "loss": 0.3081, "step": 99200 }, { "epoch": 0.98, "grad_norm": 7.3680219650268555, "learning_rate": 3.46248275862069e-06, "loss": 0.2574, "step": 99225 }, { "epoch": 0.98, "grad_norm": 6.200817108154297, "learning_rate": 3.4620517241379313e-06, "loss": 0.3205, "step": 99250 }, { "epoch": 0.98, "grad_norm": 10.70809268951416, "learning_rate": 3.461620689655173e-06, "loss": 0.2207, "step": 99275 }, { "epoch": 0.98, "grad_norm": 11.564445495605469, "learning_rate": 3.461189655172414e-06, "loss": 0.3355, "step": 99300 }, { "epoch": 0.98, "grad_norm": 7.8250508308410645, "learning_rate": 3.4607586206896553e-06, "loss": 0.1948, "step": 99325 }, { "epoch": 0.98, "grad_norm": 20.98024559020996, "learning_rate": 3.4603275862068967e-06, "loss": 0.3141, "step": 99350 }, { "epoch": 0.98, "grad_norm": 10.924944877624512, "learning_rate": 3.4598965517241386e-06, "loss": 0.1889, "step": 99375 }, { "epoch": 0.98, "grad_norm": 9.292162895202637, "learning_rate": 3.4594655172413792e-06, "loss": 0.3241, "step": 99400 }, { "epoch": 0.98, "grad_norm": 11.443410873413086, "learning_rate": 3.4590344827586207e-06, "loss": 0.2052, "step": 99425 }, { "epoch": 0.98, "grad_norm": 20.778932571411133, "learning_rate": 3.4586034482758626e-06, "loss": 0.3706, "step": 99450 }, { "epoch": 0.98, "grad_norm": 15.394172668457031, "learning_rate": 3.458172413793104e-06, "loss": 0.2154, "step": 99475 }, { "epoch": 0.98, "grad_norm": 30.179964065551758, "learning_rate": 3.4577413793103455e-06, "loss": 0.3635, "step": 99500 }, { "epoch": 0.98, "grad_norm": 10.032708168029785, "learning_rate": 3.4573103448275865e-06, "loss": 0.205, "step": 99525 }, { "epoch": 0.98, "grad_norm": 13.031989097595215, "learning_rate": 3.456879310344828e-06, "loss": 0.3957, "step": 99550 }, { "epoch": 0.98, "grad_norm": 5.926522254943848, "learning_rate": 3.4564482758620694e-06, "loss": 0.1852, "step": 99575 }, { "epoch": 0.98, "grad_norm": 21.004066467285156, "learning_rate": 3.456017241379311e-06, "loss": 0.4114, "step": 99600 }, { "epoch": 0.98, "grad_norm": 2.9540457725524902, "learning_rate": 3.455586206896552e-06, "loss": 0.1702, "step": 99625 }, { "epoch": 0.98, "grad_norm": 15.53557014465332, "learning_rate": 3.4551551724137934e-06, "loss": 0.3489, "step": 99650 }, { "epoch": 0.98, "grad_norm": 8.126785278320312, "learning_rate": 3.454724137931035e-06, "loss": 0.2119, "step": 99675 }, { "epoch": 0.98, "grad_norm": 21.087783813476562, "learning_rate": 3.4542931034482763e-06, "loss": 0.3802, "step": 99700 }, { "epoch": 0.98, "grad_norm": 9.34326171875, "learning_rate": 3.4538620689655177e-06, "loss": 0.2154, "step": 99725 }, { "epoch": 0.98, "grad_norm": 26.81128692626953, "learning_rate": 3.4534310344827588e-06, "loss": 0.3605, "step": 99750 }, { "epoch": 0.98, "grad_norm": 11.6315336227417, "learning_rate": 3.4530000000000002e-06, "loss": 0.1882, "step": 99775 }, { "epoch": 0.98, "grad_norm": 18.501142501831055, "learning_rate": 3.4525689655172417e-06, "loss": 0.3943, "step": 99800 }, { "epoch": 0.98, "grad_norm": 10.818281173706055, "learning_rate": 3.452137931034483e-06, "loss": 0.2095, "step": 99825 }, { "epoch": 0.98, "grad_norm": 18.139360427856445, "learning_rate": 3.451706896551724e-06, "loss": 0.3598, "step": 99850 }, { "epoch": 0.98, "grad_norm": 8.642694473266602, "learning_rate": 3.4512758620689656e-06, "loss": 0.1863, "step": 99875 }, { "epoch": 0.98, "grad_norm": 12.129343032836914, "learning_rate": 3.450844827586207e-06, "loss": 0.4322, "step": 99900 }, { "epoch": 0.98, "grad_norm": 7.606705665588379, "learning_rate": 3.4504137931034485e-06, "loss": 0.2321, "step": 99925 }, { "epoch": 0.98, "grad_norm": 20.041698455810547, "learning_rate": 3.44998275862069e-06, "loss": 0.4045, "step": 99950 }, { "epoch": 0.98, "grad_norm": 11.577198028564453, "learning_rate": 3.449551724137931e-06, "loss": 0.2172, "step": 99975 }, { "epoch": 0.98, "grad_norm": 10.000850677490234, "learning_rate": 3.4491206896551725e-06, "loss": 0.4086, "step": 100000 }, { "epoch": 0.98, "eval_loss": 0.4551270008087158, "eval_runtime": 5746.7065, "eval_samples_per_second": 1.647, "eval_steps_per_second": 0.206, "eval_wer": 0.1363548574003734, "step": 100000 }, { "epoch": 0.98, "grad_norm": 11.768099784851074, "learning_rate": 3.4486896551724144e-06, "loss": 0.1675, "step": 100025 }, { "epoch": 0.98, "grad_norm": 12.739967346191406, "learning_rate": 3.448258620689656e-06, "loss": 0.3343, "step": 100050 }, { "epoch": 0.98, "grad_norm": 9.446381568908691, "learning_rate": 3.4478275862068964e-06, "loss": 0.1895, "step": 100075 }, { "epoch": 0.98, "grad_norm": 18.847597122192383, "learning_rate": 3.4473965517241383e-06, "loss": 0.3219, "step": 100100 }, { "epoch": 0.98, "grad_norm": 11.460186958312988, "learning_rate": 3.4469655172413798e-06, "loss": 0.2207, "step": 100125 }, { "epoch": 0.99, "grad_norm": 17.93419647216797, "learning_rate": 3.4465344827586212e-06, "loss": 0.3598, "step": 100150 }, { "epoch": 0.99, "grad_norm": 9.247539520263672, "learning_rate": 3.4461034482758627e-06, "loss": 0.2144, "step": 100175 }, { "epoch": 0.99, "grad_norm": 17.808088302612305, "learning_rate": 3.4456724137931037e-06, "loss": 0.3156, "step": 100200 }, { "epoch": 0.99, "grad_norm": 11.912178039550781, "learning_rate": 3.445241379310345e-06, "loss": 0.2217, "step": 100225 }, { "epoch": 0.99, "grad_norm": 23.800453186035156, "learning_rate": 3.4448103448275866e-06, "loss": 0.373, "step": 100250 }, { "epoch": 0.99, "grad_norm": 7.8019700050354, "learning_rate": 3.444379310344828e-06, "loss": 0.2221, "step": 100275 }, { "epoch": 0.99, "grad_norm": 11.110395431518555, "learning_rate": 3.443948275862069e-06, "loss": 0.3169, "step": 100300 }, { "epoch": 0.99, "grad_norm": 11.17378044128418, "learning_rate": 3.4435172413793106e-06, "loss": 0.2389, "step": 100325 }, { "epoch": 0.99, "grad_norm": 13.659387588500977, "learning_rate": 3.443086206896552e-06, "loss": 0.3429, "step": 100350 }, { "epoch": 0.99, "grad_norm": 5.257768630981445, "learning_rate": 3.4426551724137935e-06, "loss": 0.226, "step": 100375 }, { "epoch": 0.99, "grad_norm": 18.369400024414062, "learning_rate": 3.4422241379310345e-06, "loss": 0.3827, "step": 100400 }, { "epoch": 0.99, "grad_norm": 11.020508766174316, "learning_rate": 3.441793103448276e-06, "loss": 0.2379, "step": 100425 }, { "epoch": 0.99, "grad_norm": 15.94006061553955, "learning_rate": 3.4413620689655174e-06, "loss": 0.3175, "step": 100450 }, { "epoch": 0.99, "grad_norm": 9.249139785766602, "learning_rate": 3.440931034482759e-06, "loss": 0.1992, "step": 100475 }, { "epoch": 0.99, "grad_norm": 15.41104793548584, "learning_rate": 3.4405000000000003e-06, "loss": 0.3845, "step": 100500 }, { "epoch": 0.99, "grad_norm": 9.70921516418457, "learning_rate": 3.4400689655172414e-06, "loss": 0.2055, "step": 100525 }, { "epoch": 0.99, "grad_norm": 21.990434646606445, "learning_rate": 3.439637931034483e-06, "loss": 0.3651, "step": 100550 }, { "epoch": 0.99, "grad_norm": 6.926893711090088, "learning_rate": 3.4392068965517243e-06, "loss": 0.2006, "step": 100575 }, { "epoch": 0.99, "grad_norm": 17.780546188354492, "learning_rate": 3.438775862068966e-06, "loss": 0.3496, "step": 100600 }, { "epoch": 0.99, "grad_norm": 5.768442153930664, "learning_rate": 3.4383448275862068e-06, "loss": 0.2323, "step": 100625 }, { "epoch": 0.99, "grad_norm": 15.10369873046875, "learning_rate": 3.4379137931034482e-06, "loss": 0.3353, "step": 100650 }, { "epoch": 0.99, "grad_norm": 6.088253498077393, "learning_rate": 3.43748275862069e-06, "loss": 0.1552, "step": 100675 }, { "epoch": 0.99, "grad_norm": 14.38453197479248, "learning_rate": 3.4370517241379316e-06, "loss": 0.3939, "step": 100700 }, { "epoch": 0.99, "grad_norm": 3.455209255218506, "learning_rate": 3.436620689655173e-06, "loss": 0.2349, "step": 100725 }, { "epoch": 0.99, "grad_norm": 19.239593505859375, "learning_rate": 3.436189655172414e-06, "loss": 0.3535, "step": 100750 }, { "epoch": 0.99, "grad_norm": 10.05458927154541, "learning_rate": 3.4357586206896555e-06, "loss": 0.2268, "step": 100775 }, { "epoch": 0.99, "grad_norm": 13.808792114257812, "learning_rate": 3.435327586206897e-06, "loss": 0.3454, "step": 100800 }, { "epoch": 0.99, "grad_norm": 7.7847580909729, "learning_rate": 3.4348965517241384e-06, "loss": 0.2112, "step": 100825 }, { "epoch": 0.99, "grad_norm": 15.525785446166992, "learning_rate": 3.4344655172413795e-06, "loss": 0.3408, "step": 100850 }, { "epoch": 0.99, "grad_norm": 11.629171371459961, "learning_rate": 3.434034482758621e-06, "loss": 0.2081, "step": 100875 }, { "epoch": 0.99, "grad_norm": 8.881190299987793, "learning_rate": 3.4336034482758624e-06, "loss": 0.364, "step": 100900 }, { "epoch": 0.99, "grad_norm": 7.981686592102051, "learning_rate": 3.433172413793104e-06, "loss": 0.1929, "step": 100925 }, { "epoch": 0.99, "grad_norm": 13.823870658874512, "learning_rate": 3.4327413793103453e-06, "loss": 0.3463, "step": 100950 }, { "epoch": 0.99, "grad_norm": 5.913155555725098, "learning_rate": 3.4323103448275863e-06, "loss": 0.2459, "step": 100975 }, { "epoch": 0.99, "grad_norm": 18.858259201049805, "learning_rate": 3.4318793103448278e-06, "loss": 0.3441, "step": 101000 }, { "epoch": 0.99, "grad_norm": 9.662789344787598, "learning_rate": 3.4314482758620692e-06, "loss": 0.2272, "step": 101025 }, { "epoch": 0.99, "grad_norm": 21.744794845581055, "learning_rate": 3.4310172413793107e-06, "loss": 0.4018, "step": 101050 }, { "epoch": 0.99, "grad_norm": 9.902578353881836, "learning_rate": 3.4305862068965517e-06, "loss": 0.1776, "step": 101075 }, { "epoch": 0.99, "grad_norm": 17.50059700012207, "learning_rate": 3.430155172413793e-06, "loss": 0.3011, "step": 101100 }, { "epoch": 0.99, "grad_norm": 6.5987982749938965, "learning_rate": 3.4297241379310346e-06, "loss": 0.1744, "step": 101125 }, { "epoch": 0.99, "grad_norm": 13.350449562072754, "learning_rate": 3.429293103448276e-06, "loss": 0.3034, "step": 101150 }, { "epoch": 1.0, "grad_norm": 7.672389030456543, "learning_rate": 3.4288620689655176e-06, "loss": 0.1962, "step": 101175 }, { "epoch": 1.0, "grad_norm": 22.011966705322266, "learning_rate": 3.4284310344827586e-06, "loss": 0.4072, "step": 101200 }, { "epoch": 1.0, "grad_norm": 2.628723382949829, "learning_rate": 3.428e-06, "loss": 0.2383, "step": 101225 }, { "epoch": 1.0, "grad_norm": 16.937978744506836, "learning_rate": 3.427568965517242e-06, "loss": 0.3362, "step": 101250 }, { "epoch": 1.0, "grad_norm": 9.742767333984375, "learning_rate": 3.4271379310344834e-06, "loss": 0.1845, "step": 101275 }, { "epoch": 1.0, "grad_norm": 11.575447082519531, "learning_rate": 3.426706896551724e-06, "loss": 0.358, "step": 101300 }, { "epoch": 1.0, "grad_norm": 5.774661540985107, "learning_rate": 3.426275862068966e-06, "loss": 0.197, "step": 101325 }, { "epoch": 1.0, "grad_norm": 19.33441162109375, "learning_rate": 3.4258448275862073e-06, "loss": 0.3763, "step": 101350 }, { "epoch": 1.0, "grad_norm": 8.095070838928223, "learning_rate": 3.4254137931034488e-06, "loss": 0.213, "step": 101375 }, { "epoch": 1.0, "grad_norm": 16.19361686706543, "learning_rate": 3.4249827586206902e-06, "loss": 0.3192, "step": 101400 }, { "epoch": 1.0, "grad_norm": 8.141512870788574, "learning_rate": 3.4245517241379313e-06, "loss": 0.2162, "step": 101425 }, { "epoch": 1.0, "grad_norm": 12.596837043762207, "learning_rate": 3.4241206896551727e-06, "loss": 0.3597, "step": 101450 }, { "epoch": 1.0, "grad_norm": 7.168547630310059, "learning_rate": 3.423689655172414e-06, "loss": 0.1793, "step": 101475 }, { "epoch": 1.0, "grad_norm": 16.362674713134766, "learning_rate": 3.4232586206896556e-06, "loss": 0.3793, "step": 101500 }, { "epoch": 1.0, "grad_norm": 10.554305076599121, "learning_rate": 3.4228275862068967e-06, "loss": 0.2362, "step": 101525 }, { "epoch": 1.0, "grad_norm": 18.117582321166992, "learning_rate": 3.4224137931034486e-06, "loss": 0.3101, "step": 101550 }, { "epoch": 1.0, "grad_norm": 10.836421966552734, "learning_rate": 3.42198275862069e-06, "loss": 0.1891, "step": 101575 }, { "epoch": 1.0, "grad_norm": 16.08357048034668, "learning_rate": 3.4215517241379315e-06, "loss": 0.3816, "step": 101600 }, { "epoch": 1.0, "grad_norm": 11.475577354431152, "learning_rate": 3.421120689655173e-06, "loss": 0.253, "step": 101625 }, { "epoch": 1.0, "grad_norm": 21.40730857849121, "learning_rate": 3.420689655172414e-06, "loss": 0.397, "step": 101650 }, { "epoch": 1.0, "grad_norm": 4.225343227386475, "learning_rate": 3.4202586206896554e-06, "loss": 0.2682, "step": 101675 }, { "epoch": 1.0, "grad_norm": 3.8249735832214355, "learning_rate": 3.419827586206897e-06, "loss": 0.1255, "step": 101700 }, { "epoch": 1.0, "grad_norm": 5.565564155578613, "learning_rate": 3.4193965517241383e-06, "loss": 0.2551, "step": 101725 }, { "epoch": 1.0, "grad_norm": 8.956576347351074, "learning_rate": 3.4189655172413794e-06, "loss": 0.1354, "step": 101750 }, { "epoch": 1.0, "grad_norm": 3.443147659301758, "learning_rate": 3.418534482758621e-06, "loss": 0.2475, "step": 101775 }, { "epoch": 1.0, "grad_norm": 9.677929878234863, "learning_rate": 3.4181034482758623e-06, "loss": 0.1524, "step": 101800 }, { "epoch": 1.0, "grad_norm": 4.7935709953308105, "learning_rate": 3.4176724137931037e-06, "loss": 0.2996, "step": 101825 }, { "epoch": 1.0, "grad_norm": 7.943664073944092, "learning_rate": 3.4172413793103448e-06, "loss": 0.1519, "step": 101850 }, { "epoch": 1.0, "grad_norm": 4.413126468658447, "learning_rate": 3.4168103448275862e-06, "loss": 0.3095, "step": 101875 }, { "epoch": 1.0, "grad_norm": 7.387980937957764, "learning_rate": 3.4163793103448277e-06, "loss": 0.1335, "step": 101900 }, { "epoch": 1.0, "grad_norm": 3.430388927459717, "learning_rate": 3.415948275862069e-06, "loss": 0.2966, "step": 101925 }, { "epoch": 1.0, "grad_norm": 4.315019607543945, "learning_rate": 3.415517241379311e-06, "loss": 0.124, "step": 101950 }, { "epoch": 1.0, "grad_norm": 6.2723493576049805, "learning_rate": 3.4150862068965516e-06, "loss": 0.2855, "step": 101975 }, { "epoch": 1.0, "grad_norm": 8.871296882629395, "learning_rate": 3.4146551724137935e-06, "loss": 0.1418, "step": 102000 }, { "epoch": 1.0, "grad_norm": 5.543117046356201, "learning_rate": 3.414224137931035e-06, "loss": 0.3593, "step": 102025 }, { "epoch": 1.0, "grad_norm": 12.284266471862793, "learning_rate": 3.4137931034482764e-06, "loss": 0.1209, "step": 102050 }, { "epoch": 1.0, "grad_norm": 4.593609809875488, "learning_rate": 3.4133620689655174e-06, "loss": 0.3435, "step": 102075 }, { "epoch": 1.0, "grad_norm": 8.85145092010498, "learning_rate": 3.412931034482759e-06, "loss": 0.1551, "step": 102100 }, { "epoch": 1.0, "grad_norm": 3.96225643157959, "learning_rate": 3.4125000000000004e-06, "loss": 0.2894, "step": 102125 }, { "epoch": 1.0, "grad_norm": 7.471660614013672, "learning_rate": 3.412068965517242e-06, "loss": 0.1694, "step": 102150 }, { "epoch": 1.0, "grad_norm": 3.1747143268585205, "learning_rate": 3.4116379310344833e-06, "loss": 0.2966, "step": 102175 }, { "epoch": 1.01, "grad_norm": 6.216019153594971, "learning_rate": 3.4112068965517243e-06, "loss": 0.1344, "step": 102200 }, { "epoch": 1.01, "grad_norm": 4.761691570281982, "learning_rate": 3.4107758620689658e-06, "loss": 0.2621, "step": 102225 }, { "epoch": 1.01, "grad_norm": 8.483795166015625, "learning_rate": 3.4103448275862072e-06, "loss": 0.1519, "step": 102250 }, { "epoch": 1.01, "grad_norm": 4.3860249519348145, "learning_rate": 3.4099137931034487e-06, "loss": 0.2771, "step": 102275 }, { "epoch": 1.01, "grad_norm": 6.087858200073242, "learning_rate": 3.4094827586206897e-06, "loss": 0.1399, "step": 102300 }, { "epoch": 1.01, "grad_norm": 5.679429054260254, "learning_rate": 3.409051724137931e-06, "loss": 0.3153, "step": 102325 }, { "epoch": 1.01, "grad_norm": 4.724513053894043, "learning_rate": 3.4086206896551726e-06, "loss": 0.1406, "step": 102350 }, { "epoch": 1.01, "grad_norm": 4.578142166137695, "learning_rate": 3.408189655172414e-06, "loss": 0.2975, "step": 102375 }, { "epoch": 1.01, "grad_norm": 5.359020709991455, "learning_rate": 3.4077586206896555e-06, "loss": 0.147, "step": 102400 }, { "epoch": 1.01, "grad_norm": 4.546832084655762, "learning_rate": 3.4073275862068966e-06, "loss": 0.3067, "step": 102425 }, { "epoch": 1.01, "grad_norm": 13.371234893798828, "learning_rate": 3.406896551724138e-06, "loss": 0.1336, "step": 102450 }, { "epoch": 1.01, "grad_norm": 4.679771900177002, "learning_rate": 3.4064655172413795e-06, "loss": 0.2735, "step": 102475 }, { "epoch": 1.01, "grad_norm": 9.251432418823242, "learning_rate": 3.406034482758621e-06, "loss": 0.134, "step": 102500 }, { "epoch": 1.01, "grad_norm": 4.5908098220825195, "learning_rate": 3.405603448275862e-06, "loss": 0.3318, "step": 102525 }, { "epoch": 1.01, "grad_norm": 5.001606464385986, "learning_rate": 3.4051724137931034e-06, "loss": 0.1387, "step": 102550 }, { "epoch": 1.01, "grad_norm": 4.875164985656738, "learning_rate": 3.4047413793103453e-06, "loss": 0.2992, "step": 102575 }, { "epoch": 1.01, "grad_norm": 1.2268571853637695, "learning_rate": 3.4043103448275868e-06, "loss": 0.165, "step": 102600 }, { "epoch": 1.01, "grad_norm": 3.4337499141693115, "learning_rate": 3.4038793103448282e-06, "loss": 0.246, "step": 102625 }, { "epoch": 1.01, "grad_norm": 7.67635440826416, "learning_rate": 3.4034482758620693e-06, "loss": 0.1427, "step": 102650 }, { "epoch": 1.01, "grad_norm": 5.115329265594482, "learning_rate": 3.4030172413793107e-06, "loss": 0.3171, "step": 102675 }, { "epoch": 1.01, "grad_norm": 8.094292640686035, "learning_rate": 3.402586206896552e-06, "loss": 0.1259, "step": 102700 }, { "epoch": 1.01, "grad_norm": 4.2693257331848145, "learning_rate": 3.4021551724137936e-06, "loss": 0.2439, "step": 102725 }, { "epoch": 1.01, "grad_norm": 11.182710647583008, "learning_rate": 3.4017241379310347e-06, "loss": 0.168, "step": 102750 }, { "epoch": 1.01, "grad_norm": 3.9961259365081787, "learning_rate": 3.401293103448276e-06, "loss": 0.3218, "step": 102775 }, { "epoch": 1.01, "grad_norm": 7.943589687347412, "learning_rate": 3.4008620689655176e-06, "loss": 0.1356, "step": 102800 }, { "epoch": 1.01, "grad_norm": 4.225972652435303, "learning_rate": 3.400431034482759e-06, "loss": 0.2829, "step": 102825 }, { "epoch": 1.01, "grad_norm": 7.802892208099365, "learning_rate": 3.4000000000000005e-06, "loss": 0.17, "step": 102850 }, { "epoch": 1.01, "grad_norm": 4.52353572845459, "learning_rate": 3.3995689655172415e-06, "loss": 0.3209, "step": 102875 }, { "epoch": 1.01, "grad_norm": 10.003146171569824, "learning_rate": 3.399137931034483e-06, "loss": 0.1953, "step": 102900 }, { "epoch": 1.01, "grad_norm": 6.36939001083374, "learning_rate": 3.3987068965517244e-06, "loss": 0.2275, "step": 102925 }, { "epoch": 1.01, "grad_norm": 4.670156478881836, "learning_rate": 3.398275862068966e-06, "loss": 0.1092, "step": 102950 }, { "epoch": 1.01, "grad_norm": 3.3930749893188477, "learning_rate": 3.397844827586207e-06, "loss": 0.2773, "step": 102975 }, { "epoch": 1.01, "grad_norm": 6.2586669921875, "learning_rate": 3.3974137931034484e-06, "loss": 0.1345, "step": 103000 }, { "epoch": 1.01, "grad_norm": 3.8697516918182373, "learning_rate": 3.39698275862069e-06, "loss": 0.2866, "step": 103025 }, { "epoch": 1.01, "grad_norm": 11.792309761047363, "learning_rate": 3.3965517241379313e-06, "loss": 0.1585, "step": 103050 }, { "epoch": 1.01, "grad_norm": 5.408161163330078, "learning_rate": 3.3961206896551727e-06, "loss": 0.288, "step": 103075 }, { "epoch": 1.01, "grad_norm": 13.397998809814453, "learning_rate": 3.3956896551724138e-06, "loss": 0.1265, "step": 103100 }, { "epoch": 1.01, "grad_norm": 4.699988842010498, "learning_rate": 3.3952586206896552e-06, "loss": 0.3127, "step": 103125 }, { "epoch": 1.01, "grad_norm": 5.717215538024902, "learning_rate": 3.3948275862068967e-06, "loss": 0.1452, "step": 103150 }, { "epoch": 1.01, "grad_norm": 3.7666208744049072, "learning_rate": 3.3943965517241386e-06, "loss": 0.2835, "step": 103175 }, { "epoch": 1.02, "grad_norm": 7.425102710723877, "learning_rate": 3.393965517241379e-06, "loss": 0.1519, "step": 103200 }, { "epoch": 1.02, "grad_norm": 4.6862874031066895, "learning_rate": 3.393534482758621e-06, "loss": 0.2908, "step": 103225 }, { "epoch": 1.02, "grad_norm": 5.591982841491699, "learning_rate": 3.3931034482758625e-06, "loss": 0.1542, "step": 103250 }, { "epoch": 1.02, "grad_norm": 5.855827808380127, "learning_rate": 3.392672413793104e-06, "loss": 0.3124, "step": 103275 }, { "epoch": 1.02, "grad_norm": 10.577664375305176, "learning_rate": 3.3922413793103454e-06, "loss": 0.1451, "step": 103300 }, { "epoch": 1.02, "grad_norm": 4.65044641494751, "learning_rate": 3.3918103448275865e-06, "loss": 0.2755, "step": 103325 }, { "epoch": 1.02, "grad_norm": 5.457197189331055, "learning_rate": 3.391379310344828e-06, "loss": 0.1537, "step": 103350 }, { "epoch": 1.02, "grad_norm": 4.750511169433594, "learning_rate": 3.3909482758620694e-06, "loss": 0.3054, "step": 103375 }, { "epoch": 1.02, "grad_norm": 10.184633255004883, "learning_rate": 3.390517241379311e-06, "loss": 0.1448, "step": 103400 }, { "epoch": 1.02, "grad_norm": 5.481217384338379, "learning_rate": 3.390086206896552e-06, "loss": 0.2544, "step": 103425 }, { "epoch": 1.02, "grad_norm": 12.885273933410645, "learning_rate": 3.3896551724137933e-06, "loss": 0.1482, "step": 103450 }, { "epoch": 1.02, "grad_norm": 3.8821210861206055, "learning_rate": 3.3892241379310348e-06, "loss": 0.2672, "step": 103475 }, { "epoch": 1.02, "grad_norm": 6.169131755828857, "learning_rate": 3.3887931034482762e-06, "loss": 0.1601, "step": 103500 }, { "epoch": 1.02, "grad_norm": 4.798482894897461, "learning_rate": 3.3883620689655177e-06, "loss": 0.3559, "step": 103525 }, { "epoch": 1.02, "grad_norm": 6.9809088706970215, "learning_rate": 3.3879310344827587e-06, "loss": 0.1409, "step": 103550 }, { "epoch": 1.02, "grad_norm": 4.169206142425537, "learning_rate": 3.3875e-06, "loss": 0.279, "step": 103575 }, { "epoch": 1.02, "grad_norm": 7.733280181884766, "learning_rate": 3.3870689655172416e-06, "loss": 0.1701, "step": 103600 }, { "epoch": 1.02, "grad_norm": 4.253648281097412, "learning_rate": 3.3866551724137935e-06, "loss": 0.2487, "step": 103625 }, { "epoch": 1.02, "grad_norm": 6.417905330657959, "learning_rate": 3.3862241379310345e-06, "loss": 0.1355, "step": 103650 }, { "epoch": 1.02, "grad_norm": 4.293128967285156, "learning_rate": 3.385793103448276e-06, "loss": 0.2785, "step": 103675 }, { "epoch": 1.02, "grad_norm": 5.139404296875, "learning_rate": 3.3853620689655175e-06, "loss": 0.1568, "step": 103700 }, { "epoch": 1.02, "grad_norm": 3.932647943496704, "learning_rate": 3.384931034482759e-06, "loss": 0.2468, "step": 103725 }, { "epoch": 1.02, "grad_norm": 7.165207862854004, "learning_rate": 3.3845e-06, "loss": 0.1363, "step": 103750 }, { "epoch": 1.02, "grad_norm": 5.2486419677734375, "learning_rate": 3.3840689655172414e-06, "loss": 0.2887, "step": 103775 }, { "epoch": 1.02, "grad_norm": 9.974930763244629, "learning_rate": 3.383637931034483e-06, "loss": 0.1485, "step": 103800 }, { "epoch": 1.02, "grad_norm": 5.755860805511475, "learning_rate": 3.3832068965517243e-06, "loss": 0.2933, "step": 103825 }, { "epoch": 1.02, "grad_norm": 7.807694911956787, "learning_rate": 3.382775862068966e-06, "loss": 0.153, "step": 103850 }, { "epoch": 1.02, "grad_norm": 4.641164302825928, "learning_rate": 3.382344827586207e-06, "loss": 0.2556, "step": 103875 }, { "epoch": 1.02, "grad_norm": 7.436587333679199, "learning_rate": 3.3819137931034483e-06, "loss": 0.1403, "step": 103900 }, { "epoch": 1.02, "grad_norm": 4.768685817718506, "learning_rate": 3.38148275862069e-06, "loss": 0.2976, "step": 103925 }, { "epoch": 1.02, "grad_norm": 9.565086364746094, "learning_rate": 3.3810517241379316e-06, "loss": 0.1314, "step": 103950 }, { "epoch": 1.02, "grad_norm": 3.7797317504882812, "learning_rate": 3.3806206896551726e-06, "loss": 0.2536, "step": 103975 }, { "epoch": 1.02, "grad_norm": 11.535787582397461, "learning_rate": 3.380189655172414e-06, "loss": 0.1383, "step": 104000 }, { "epoch": 1.02, "grad_norm": 7.325189113616943, "learning_rate": 3.3797586206896555e-06, "loss": 0.2588, "step": 104025 }, { "epoch": 1.02, "grad_norm": 24.37944984436035, "learning_rate": 3.379327586206897e-06, "loss": 0.1459, "step": 104050 }, { "epoch": 1.02, "grad_norm": 4.08526611328125, "learning_rate": 3.3788965517241385e-06, "loss": 0.2949, "step": 104075 }, { "epoch": 1.02, "grad_norm": 4.178027629852295, "learning_rate": 3.3784655172413795e-06, "loss": 0.1513, "step": 104100 }, { "epoch": 1.02, "grad_norm": 4.7601399421691895, "learning_rate": 3.378034482758621e-06, "loss": 0.25, "step": 104125 }, { "epoch": 1.02, "grad_norm": 6.491186618804932, "learning_rate": 3.3776034482758624e-06, "loss": 0.1679, "step": 104150 }, { "epoch": 1.02, "grad_norm": 3.753743886947632, "learning_rate": 3.377172413793104e-06, "loss": 0.3022, "step": 104175 }, { "epoch": 1.02, "grad_norm": 5.280703067779541, "learning_rate": 3.376741379310345e-06, "loss": 0.1466, "step": 104200 }, { "epoch": 1.03, "grad_norm": 3.5088846683502197, "learning_rate": 3.3763103448275864e-06, "loss": 0.2493, "step": 104225 }, { "epoch": 1.03, "grad_norm": 11.112874031066895, "learning_rate": 3.375879310344828e-06, "loss": 0.1462, "step": 104250 }, { "epoch": 1.03, "grad_norm": 5.269215106964111, "learning_rate": 3.3754482758620693e-06, "loss": 0.2768, "step": 104275 }, { "epoch": 1.03, "grad_norm": 9.965719223022461, "learning_rate": 3.3750172413793107e-06, "loss": 0.1582, "step": 104300 }, { "epoch": 1.03, "grad_norm": 5.097647190093994, "learning_rate": 3.3745862068965518e-06, "loss": 0.2577, "step": 104325 }, { "epoch": 1.03, "grad_norm": 11.805572509765625, "learning_rate": 3.3741551724137932e-06, "loss": 0.1506, "step": 104350 }, { "epoch": 1.03, "grad_norm": 4.878286361694336, "learning_rate": 3.3737241379310347e-06, "loss": 0.2628, "step": 104375 }, { "epoch": 1.03, "grad_norm": 8.456463813781738, "learning_rate": 3.373293103448276e-06, "loss": 0.1439, "step": 104400 }, { "epoch": 1.03, "grad_norm": 4.331110954284668, "learning_rate": 3.372862068965517e-06, "loss": 0.3565, "step": 104425 }, { "epoch": 1.03, "grad_norm": 5.919315338134766, "learning_rate": 3.3724310344827586e-06, "loss": 0.1352, "step": 104450 }, { "epoch": 1.03, "grad_norm": 4.097039222717285, "learning_rate": 3.372e-06, "loss": 0.2881, "step": 104475 }, { "epoch": 1.03, "grad_norm": 4.098126411437988, "learning_rate": 3.371568965517242e-06, "loss": 0.1187, "step": 104500 }, { "epoch": 1.03, "grad_norm": 4.124552249908447, "learning_rate": 3.3711379310344834e-06, "loss": 0.2863, "step": 104525 }, { "epoch": 1.03, "grad_norm": 14.136809349060059, "learning_rate": 3.370706896551724e-06, "loss": 0.1651, "step": 104550 }, { "epoch": 1.03, "grad_norm": 3.8565475940704346, "learning_rate": 3.370275862068966e-06, "loss": 0.3203, "step": 104575 }, { "epoch": 1.03, "grad_norm": 6.437408924102783, "learning_rate": 3.3698448275862074e-06, "loss": 0.1684, "step": 104600 }, { "epoch": 1.03, "grad_norm": 4.442893981933594, "learning_rate": 3.369413793103449e-06, "loss": 0.2787, "step": 104625 }, { "epoch": 1.03, "grad_norm": 2.9647462368011475, "learning_rate": 3.36898275862069e-06, "loss": 0.1548, "step": 104650 }, { "epoch": 1.03, "grad_norm": 5.502098560333252, "learning_rate": 3.3685517241379313e-06, "loss": 0.2531, "step": 104675 }, { "epoch": 1.03, "grad_norm": 8.065424919128418, "learning_rate": 3.3681206896551728e-06, "loss": 0.1599, "step": 104700 }, { "epoch": 1.03, "grad_norm": 4.326954364776611, "learning_rate": 3.3676896551724142e-06, "loss": 0.2418, "step": 104725 }, { "epoch": 1.03, "grad_norm": 10.649114608764648, "learning_rate": 3.3672586206896557e-06, "loss": 0.1579, "step": 104750 }, { "epoch": 1.03, "grad_norm": 4.497207164764404, "learning_rate": 3.3668275862068967e-06, "loss": 0.2699, "step": 104775 }, { "epoch": 1.03, "grad_norm": 13.650151252746582, "learning_rate": 3.366396551724138e-06, "loss": 0.127, "step": 104800 }, { "epoch": 1.03, "grad_norm": 4.800667762756348, "learning_rate": 3.3659655172413796e-06, "loss": 0.2826, "step": 104825 }, { "epoch": 1.03, "grad_norm": 5.070952892303467, "learning_rate": 3.365534482758621e-06, "loss": 0.1301, "step": 104850 }, { "epoch": 1.03, "grad_norm": 6.802369117736816, "learning_rate": 3.365103448275862e-06, "loss": 0.2988, "step": 104875 }, { "epoch": 1.03, "grad_norm": 7.4689507484436035, "learning_rate": 3.3646724137931036e-06, "loss": 0.1653, "step": 104900 }, { "epoch": 1.03, "grad_norm": 4.536859035491943, "learning_rate": 3.364241379310345e-06, "loss": 0.2725, "step": 104925 }, { "epoch": 1.03, "grad_norm": 8.541963577270508, "learning_rate": 3.3638103448275865e-06, "loss": 0.1705, "step": 104950 }, { "epoch": 1.03, "grad_norm": 5.37850284576416, "learning_rate": 3.363379310344828e-06, "loss": 0.3189, "step": 104975 }, { "epoch": 1.03, "grad_norm": 7.355737686157227, "learning_rate": 3.362948275862069e-06, "loss": 0.1494, "step": 105000 }, { "epoch": 1.03, "grad_norm": 5.339674949645996, "learning_rate": 3.3625172413793104e-06, "loss": 0.2566, "step": 105025 }, { "epoch": 1.03, "grad_norm": 7.758227348327637, "learning_rate": 3.362086206896552e-06, "loss": 0.1276, "step": 105050 }, { "epoch": 1.03, "grad_norm": 4.376338005065918, "learning_rate": 3.3616551724137938e-06, "loss": 0.2964, "step": 105075 }, { "epoch": 1.03, "grad_norm": 19.3480224609375, "learning_rate": 3.3612241379310344e-06, "loss": 0.1445, "step": 105100 }, { "epoch": 1.03, "grad_norm": 6.77165412902832, "learning_rate": 3.360793103448276e-06, "loss": 0.2878, "step": 105125 }, { "epoch": 1.03, "grad_norm": 6.415028095245361, "learning_rate": 3.3603620689655177e-06, "loss": 0.1613, "step": 105150 }, { "epoch": 1.03, "grad_norm": 4.529266357421875, "learning_rate": 3.359931034482759e-06, "loss": 0.3325, "step": 105175 }, { "epoch": 1.03, "grad_norm": 4.795090675354004, "learning_rate": 3.3595000000000006e-06, "loss": 0.1309, "step": 105200 }, { "epoch": 1.03, "grad_norm": 3.914883613586426, "learning_rate": 3.3590689655172416e-06, "loss": 0.2685, "step": 105225 }, { "epoch": 1.04, "grad_norm": 8.233759880065918, "learning_rate": 3.358637931034483e-06, "loss": 0.1586, "step": 105250 }, { "epoch": 1.04, "grad_norm": 5.380233287811279, "learning_rate": 3.3582068965517246e-06, "loss": 0.2859, "step": 105275 }, { "epoch": 1.04, "grad_norm": 7.86810827255249, "learning_rate": 3.357775862068966e-06, "loss": 0.1224, "step": 105300 }, { "epoch": 1.04, "grad_norm": 5.044600963592529, "learning_rate": 3.357344827586207e-06, "loss": 0.3271, "step": 105325 }, { "epoch": 1.04, "grad_norm": 30.30368423461914, "learning_rate": 3.3569137931034485e-06, "loss": 0.1357, "step": 105350 }, { "epoch": 1.04, "grad_norm": 4.819117546081543, "learning_rate": 3.35648275862069e-06, "loss": 0.2622, "step": 105375 }, { "epoch": 1.04, "grad_norm": 10.5061616897583, "learning_rate": 3.3560517241379314e-06, "loss": 0.1739, "step": 105400 }, { "epoch": 1.04, "grad_norm": 8.605958938598633, "learning_rate": 3.355620689655173e-06, "loss": 0.2986, "step": 105425 }, { "epoch": 1.04, "grad_norm": 6.62912130355835, "learning_rate": 3.355189655172414e-06, "loss": 0.1491, "step": 105450 }, { "epoch": 1.04, "grad_norm": 5.9563727378845215, "learning_rate": 3.3547586206896554e-06, "loss": 0.2773, "step": 105475 }, { "epoch": 1.04, "grad_norm": 7.063557147979736, "learning_rate": 3.354327586206897e-06, "loss": 0.1618, "step": 105500 }, { "epoch": 1.04, "grad_norm": 4.267956256866455, "learning_rate": 3.3538965517241383e-06, "loss": 0.2616, "step": 105525 }, { "epoch": 1.04, "grad_norm": 14.561707496643066, "learning_rate": 3.3534655172413793e-06, "loss": 0.1761, "step": 105550 }, { "epoch": 1.04, "grad_norm": 5.279040813446045, "learning_rate": 3.3530344827586208e-06, "loss": 0.3366, "step": 105575 }, { "epoch": 1.04, "grad_norm": 5.972095489501953, "learning_rate": 3.3526034482758622e-06, "loss": 0.1315, "step": 105600 }, { "epoch": 1.04, "grad_norm": 4.5820207595825195, "learning_rate": 3.352189655172414e-06, "loss": 0.328, "step": 105625 }, { "epoch": 1.04, "grad_norm": 5.076788902282715, "learning_rate": 3.351758620689655e-06, "loss": 0.157, "step": 105650 }, { "epoch": 1.04, "grad_norm": 4.092694282531738, "learning_rate": 3.3513275862068966e-06, "loss": 0.2931, "step": 105675 }, { "epoch": 1.04, "grad_norm": 10.920348167419434, "learning_rate": 3.350896551724138e-06, "loss": 0.1374, "step": 105700 }, { "epoch": 1.04, "grad_norm": 4.113173007965088, "learning_rate": 3.3504655172413795e-06, "loss": 0.2876, "step": 105725 }, { "epoch": 1.04, "grad_norm": 16.392229080200195, "learning_rate": 3.3500344827586214e-06, "loss": 0.1568, "step": 105750 }, { "epoch": 1.04, "grad_norm": 5.225580215454102, "learning_rate": 3.349603448275862e-06, "loss": 0.2735, "step": 105775 }, { "epoch": 1.04, "grad_norm": 10.635834693908691, "learning_rate": 3.3491724137931035e-06, "loss": 0.1453, "step": 105800 }, { "epoch": 1.04, "grad_norm": 4.248645782470703, "learning_rate": 3.3487413793103453e-06, "loss": 0.2673, "step": 105825 }, { "epoch": 1.04, "grad_norm": 7.4540581703186035, "learning_rate": 3.348310344827587e-06, "loss": 0.1662, "step": 105850 }, { "epoch": 1.04, "grad_norm": 5.833079814910889, "learning_rate": 3.3478793103448274e-06, "loss": 0.3057, "step": 105875 }, { "epoch": 1.04, "grad_norm": 8.89767074584961, "learning_rate": 3.3474482758620693e-06, "loss": 0.1352, "step": 105900 }, { "epoch": 1.04, "grad_norm": 5.671031951904297, "learning_rate": 3.3470172413793107e-06, "loss": 0.3223, "step": 105925 }, { "epoch": 1.04, "grad_norm": 7.347954273223877, "learning_rate": 3.346586206896552e-06, "loss": 0.0877, "step": 105950 }, { "epoch": 1.04, "grad_norm": 3.783900022506714, "learning_rate": 3.3461551724137937e-06, "loss": 0.3127, "step": 105975 }, { "epoch": 1.04, "grad_norm": 5.718499183654785, "learning_rate": 3.3457241379310347e-06, "loss": 0.1111, "step": 106000 }, { "epoch": 1.04, "grad_norm": 3.5925798416137695, "learning_rate": 3.345293103448276e-06, "loss": 0.2649, "step": 106025 }, { "epoch": 1.04, "grad_norm": 9.783110618591309, "learning_rate": 3.3448620689655176e-06, "loss": 0.1406, "step": 106050 }, { "epoch": 1.04, "grad_norm": 3.6397058963775635, "learning_rate": 3.344431034482759e-06, "loss": 0.2427, "step": 106075 }, { "epoch": 1.04, "grad_norm": 9.497664451599121, "learning_rate": 3.344e-06, "loss": 0.1497, "step": 106100 }, { "epoch": 1.04, "grad_norm": 6.380949974060059, "learning_rate": 3.3435689655172415e-06, "loss": 0.3092, "step": 106125 }, { "epoch": 1.04, "grad_norm": 10.85473918914795, "learning_rate": 3.343137931034483e-06, "loss": 0.1268, "step": 106150 }, { "epoch": 1.04, "grad_norm": 3.9928996562957764, "learning_rate": 3.3427068965517245e-06, "loss": 0.2403, "step": 106175 }, { "epoch": 1.04, "grad_norm": 10.325039863586426, "learning_rate": 3.342275862068966e-06, "loss": 0.1483, "step": 106200 }, { "epoch": 1.04, "grad_norm": 4.9225239753723145, "learning_rate": 3.341844827586207e-06, "loss": 0.2457, "step": 106225 }, { "epoch": 1.05, "grad_norm": 8.420709609985352, "learning_rate": 3.3414137931034484e-06, "loss": 0.1481, "step": 106250 }, { "epoch": 1.05, "grad_norm": 4.367515563964844, "learning_rate": 3.34098275862069e-06, "loss": 0.3139, "step": 106275 }, { "epoch": 1.05, "grad_norm": 4.32064151763916, "learning_rate": 3.3405517241379313e-06, "loss": 0.1213, "step": 106300 }, { "epoch": 1.05, "grad_norm": 6.856226921081543, "learning_rate": 3.3401206896551723e-06, "loss": 0.3185, "step": 106325 }, { "epoch": 1.05, "grad_norm": 3.661377191543579, "learning_rate": 3.339689655172414e-06, "loss": 0.1436, "step": 106350 }, { "epoch": 1.05, "grad_norm": 3.975986957550049, "learning_rate": 3.3392586206896553e-06, "loss": 0.217, "step": 106375 }, { "epoch": 1.05, "grad_norm": 13.079931259155273, "learning_rate": 3.338827586206897e-06, "loss": 0.1785, "step": 106400 }, { "epoch": 1.05, "grad_norm": 5.312600135803223, "learning_rate": 3.3383965517241386e-06, "loss": 0.308, "step": 106425 }, { "epoch": 1.05, "grad_norm": 6.401540756225586, "learning_rate": 3.337965517241379e-06, "loss": 0.1439, "step": 106450 }, { "epoch": 1.05, "grad_norm": 3.7105703353881836, "learning_rate": 3.337534482758621e-06, "loss": 0.3121, "step": 106475 }, { "epoch": 1.05, "grad_norm": 7.998950958251953, "learning_rate": 3.3371034482758625e-06, "loss": 0.1679, "step": 106500 }, { "epoch": 1.05, "grad_norm": 3.982758045196533, "learning_rate": 3.336672413793104e-06, "loss": 0.2839, "step": 106525 }, { "epoch": 1.05, "grad_norm": 8.415990829467773, "learning_rate": 3.336241379310345e-06, "loss": 0.1533, "step": 106550 }, { "epoch": 1.05, "grad_norm": 4.604515075683594, "learning_rate": 3.3358103448275865e-06, "loss": 0.3255, "step": 106575 }, { "epoch": 1.05, "grad_norm": 6.289758682250977, "learning_rate": 3.335379310344828e-06, "loss": 0.1232, "step": 106600 }, { "epoch": 1.05, "grad_norm": 5.206185340881348, "learning_rate": 3.3349482758620694e-06, "loss": 0.2939, "step": 106625 }, { "epoch": 1.05, "grad_norm": 6.395169734954834, "learning_rate": 3.334517241379311e-06, "loss": 0.1207, "step": 106650 }, { "epoch": 1.05, "grad_norm": 4.099924087524414, "learning_rate": 3.334086206896552e-06, "loss": 0.2901, "step": 106675 }, { "epoch": 1.05, "grad_norm": 8.008915901184082, "learning_rate": 3.3336551724137933e-06, "loss": 0.1402, "step": 106700 }, { "epoch": 1.05, "grad_norm": 3.735316038131714, "learning_rate": 3.333224137931035e-06, "loss": 0.2975, "step": 106725 }, { "epoch": 1.05, "grad_norm": 10.229997634887695, "learning_rate": 3.3327931034482763e-06, "loss": 0.1482, "step": 106750 }, { "epoch": 1.05, "grad_norm": 5.287796974182129, "learning_rate": 3.3323620689655173e-06, "loss": 0.2963, "step": 106775 }, { "epoch": 1.05, "grad_norm": 12.133255004882812, "learning_rate": 3.3319310344827587e-06, "loss": 0.139, "step": 106800 }, { "epoch": 1.05, "grad_norm": 7.077879428863525, "learning_rate": 3.3315e-06, "loss": 0.2945, "step": 106825 }, { "epoch": 1.05, "grad_norm": 14.72741413116455, "learning_rate": 3.3310689655172417e-06, "loss": 0.1341, "step": 106850 }, { "epoch": 1.05, "grad_norm": 5.954286575317383, "learning_rate": 3.330637931034483e-06, "loss": 0.3259, "step": 106875 }, { "epoch": 1.05, "grad_norm": 6.361596584320068, "learning_rate": 3.330206896551724e-06, "loss": 0.1609, "step": 106900 }, { "epoch": 1.05, "grad_norm": 4.023088455200195, "learning_rate": 3.3297758620689656e-06, "loss": 0.2431, "step": 106925 }, { "epoch": 1.05, "grad_norm": 6.115626335144043, "learning_rate": 3.329344827586207e-06, "loss": 0.1534, "step": 106950 }, { "epoch": 1.05, "grad_norm": 4.142479419708252, "learning_rate": 3.328913793103449e-06, "loss": 0.3181, "step": 106975 }, { "epoch": 1.05, "grad_norm": 8.6967191696167, "learning_rate": 3.3284827586206896e-06, "loss": 0.1159, "step": 107000 }, { "epoch": 1.05, "grad_norm": 3.8346738815307617, "learning_rate": 3.328051724137931e-06, "loss": 0.3144, "step": 107025 }, { "epoch": 1.05, "grad_norm": 6.5643696784973145, "learning_rate": 3.327620689655173e-06, "loss": 0.1312, "step": 107050 }, { "epoch": 1.05, "grad_norm": 4.290246963500977, "learning_rate": 3.3271896551724143e-06, "loss": 0.3237, "step": 107075 }, { "epoch": 1.05, "grad_norm": 5.633465766906738, "learning_rate": 3.326758620689656e-06, "loss": 0.1585, "step": 107100 }, { "epoch": 1.05, "grad_norm": 4.6048126220703125, "learning_rate": 3.326327586206897e-06, "loss": 0.3231, "step": 107125 }, { "epoch": 1.05, "grad_norm": 4.195301055908203, "learning_rate": 3.3258965517241383e-06, "loss": 0.1554, "step": 107150 }, { "epoch": 1.05, "grad_norm": 3.6123290061950684, "learning_rate": 3.3254655172413797e-06, "loss": 0.2959, "step": 107175 }, { "epoch": 1.05, "grad_norm": 10.184135437011719, "learning_rate": 3.325034482758621e-06, "loss": 0.1412, "step": 107200 }, { "epoch": 1.05, "grad_norm": 13.327254295349121, "learning_rate": 3.3246034482758622e-06, "loss": 0.3599, "step": 107225 }, { "epoch": 1.05, "grad_norm": 6.1508965492248535, "learning_rate": 3.3241724137931037e-06, "loss": 0.1421, "step": 107250 }, { "epoch": 1.06, "grad_norm": 4.339240074157715, "learning_rate": 3.323741379310345e-06, "loss": 0.3044, "step": 107275 }, { "epoch": 1.06, "grad_norm": 10.80823802947998, "learning_rate": 3.3233103448275866e-06, "loss": 0.1786, "step": 107300 }, { "epoch": 1.06, "grad_norm": 4.332066059112549, "learning_rate": 3.322879310344828e-06, "loss": 0.2685, "step": 107325 }, { "epoch": 1.06, "grad_norm": 5.307800769805908, "learning_rate": 3.322448275862069e-06, "loss": 0.1561, "step": 107350 }, { "epoch": 1.06, "grad_norm": 4.094848155975342, "learning_rate": 3.3220172413793106e-06, "loss": 0.301, "step": 107375 }, { "epoch": 1.06, "grad_norm": 12.472225189208984, "learning_rate": 3.321586206896552e-06, "loss": 0.1477, "step": 107400 }, { "epoch": 1.06, "grad_norm": 4.884403705596924, "learning_rate": 3.3211551724137935e-06, "loss": 0.226, "step": 107425 }, { "epoch": 1.06, "grad_norm": 14.094776153564453, "learning_rate": 3.3207241379310345e-06, "loss": 0.138, "step": 107450 }, { "epoch": 1.06, "grad_norm": 4.139034748077393, "learning_rate": 3.320293103448276e-06, "loss": 0.3319, "step": 107475 }, { "epoch": 1.06, "grad_norm": 11.906436920166016, "learning_rate": 3.3198620689655174e-06, "loss": 0.1313, "step": 107500 }, { "epoch": 1.06, "grad_norm": 5.372486114501953, "learning_rate": 3.319431034482759e-06, "loss": 0.3019, "step": 107525 }, { "epoch": 1.06, "grad_norm": 7.099095344543457, "learning_rate": 3.319e-06, "loss": 0.1288, "step": 107550 }, { "epoch": 1.06, "grad_norm": 4.263402938842773, "learning_rate": 3.3185689655172414e-06, "loss": 0.2704, "step": 107575 }, { "epoch": 1.06, "grad_norm": 9.697428703308105, "learning_rate": 3.318137931034483e-06, "loss": 0.1552, "step": 107600 }, { "epoch": 1.06, "grad_norm": 4.475773811340332, "learning_rate": 3.3177068965517247e-06, "loss": 0.2965, "step": 107625 }, { "epoch": 1.06, "grad_norm": 1.4874399900436401, "learning_rate": 3.317275862068966e-06, "loss": 0.1333, "step": 107650 }, { "epoch": 1.06, "grad_norm": 15.843009948730469, "learning_rate": 3.3168448275862068e-06, "loss": 0.2954, "step": 107675 }, { "epoch": 1.06, "grad_norm": 5.881358623504639, "learning_rate": 3.3164137931034486e-06, "loss": 0.148, "step": 107700 }, { "epoch": 1.06, "grad_norm": 5.791259765625, "learning_rate": 3.3160000000000005e-06, "loss": 0.2809, "step": 107725 }, { "epoch": 1.06, "grad_norm": 2.999204635620117, "learning_rate": 3.315568965517242e-06, "loss": 0.1676, "step": 107750 }, { "epoch": 1.06, "grad_norm": 4.210616111755371, "learning_rate": 3.3151379310344826e-06, "loss": 0.272, "step": 107775 }, { "epoch": 1.06, "grad_norm": 8.363523483276367, "learning_rate": 3.3147068965517245e-06, "loss": 0.175, "step": 107800 }, { "epoch": 1.06, "grad_norm": 5.106463432312012, "learning_rate": 3.314275862068966e-06, "loss": 0.2707, "step": 107825 }, { "epoch": 1.06, "grad_norm": 3.6408369541168213, "learning_rate": 3.3138448275862074e-06, "loss": 0.1242, "step": 107850 }, { "epoch": 1.06, "grad_norm": 3.734285593032837, "learning_rate": 3.313413793103449e-06, "loss": 0.265, "step": 107875 }, { "epoch": 1.06, "grad_norm": 7.327983856201172, "learning_rate": 3.31298275862069e-06, "loss": 0.1169, "step": 107900 }, { "epoch": 1.06, "grad_norm": 5.702559947967529, "learning_rate": 3.3125517241379313e-06, "loss": 0.3128, "step": 107925 }, { "epoch": 1.06, "grad_norm": 7.757970809936523, "learning_rate": 3.3121206896551728e-06, "loss": 0.1569, "step": 107950 }, { "epoch": 1.06, "grad_norm": 3.9189155101776123, "learning_rate": 3.3116896551724142e-06, "loss": 0.2481, "step": 107975 }, { "epoch": 1.06, "grad_norm": 7.453969478607178, "learning_rate": 3.3112586206896553e-06, "loss": 0.1772, "step": 108000 }, { "epoch": 1.06, "grad_norm": 4.31721305847168, "learning_rate": 3.3108275862068967e-06, "loss": 0.263, "step": 108025 }, { "epoch": 1.06, "grad_norm": 4.903803825378418, "learning_rate": 3.310396551724138e-06, "loss": 0.133, "step": 108050 }, { "epoch": 1.06, "grad_norm": 5.516169548034668, "learning_rate": 3.3099655172413796e-06, "loss": 0.278, "step": 108075 }, { "epoch": 1.06, "grad_norm": 17.45596694946289, "learning_rate": 3.309534482758621e-06, "loss": 0.1354, "step": 108100 }, { "epoch": 1.06, "grad_norm": 4.244462490081787, "learning_rate": 3.309103448275862e-06, "loss": 0.3115, "step": 108125 }, { "epoch": 1.06, "grad_norm": 7.778811454772949, "learning_rate": 3.3086724137931036e-06, "loss": 0.1675, "step": 108150 }, { "epoch": 1.06, "grad_norm": 4.557231903076172, "learning_rate": 3.308241379310345e-06, "loss": 0.3239, "step": 108175 }, { "epoch": 1.06, "grad_norm": 11.747157096862793, "learning_rate": 3.3078103448275865e-06, "loss": 0.1553, "step": 108200 }, { "epoch": 1.06, "grad_norm": 4.8908209800720215, "learning_rate": 3.3073793103448275e-06, "loss": 0.2964, "step": 108225 }, { "epoch": 1.06, "grad_norm": 3.5548508167266846, "learning_rate": 3.306948275862069e-06, "loss": 0.1354, "step": 108250 }, { "epoch": 1.06, "grad_norm": 4.747234344482422, "learning_rate": 3.3065172413793104e-06, "loss": 0.2819, "step": 108275 }, { "epoch": 1.07, "grad_norm": 9.021160125732422, "learning_rate": 3.3060862068965523e-06, "loss": 0.1438, "step": 108300 }, { "epoch": 1.07, "grad_norm": 5.510137557983398, "learning_rate": 3.3056551724137938e-06, "loss": 0.2773, "step": 108325 }, { "epoch": 1.07, "grad_norm": 9.818075180053711, "learning_rate": 3.3052241379310344e-06, "loss": 0.1667, "step": 108350 }, { "epoch": 1.07, "grad_norm": 4.023112773895264, "learning_rate": 3.3047931034482763e-06, "loss": 0.2952, "step": 108375 }, { "epoch": 1.07, "grad_norm": 8.203217506408691, "learning_rate": 3.3043620689655177e-06, "loss": 0.1469, "step": 108400 }, { "epoch": 1.07, "grad_norm": 5.165531158447266, "learning_rate": 3.303931034482759e-06, "loss": 0.2673, "step": 108425 }, { "epoch": 1.07, "grad_norm": 8.92123031616211, "learning_rate": 3.3035000000000002e-06, "loss": 0.1464, "step": 108450 }, { "epoch": 1.07, "grad_norm": 5.138284206390381, "learning_rate": 3.3030689655172417e-06, "loss": 0.2574, "step": 108475 }, { "epoch": 1.07, "grad_norm": 8.076075553894043, "learning_rate": 3.302637931034483e-06, "loss": 0.1228, "step": 108500 }, { "epoch": 1.07, "grad_norm": 5.9167890548706055, "learning_rate": 3.3022068965517246e-06, "loss": 0.292, "step": 108525 }, { "epoch": 1.07, "grad_norm": 10.755831718444824, "learning_rate": 3.301775862068966e-06, "loss": 0.1244, "step": 108550 }, { "epoch": 1.07, "grad_norm": 5.081029891967773, "learning_rate": 3.301344827586207e-06, "loss": 0.2497, "step": 108575 }, { "epoch": 1.07, "grad_norm": 10.65904426574707, "learning_rate": 3.3009137931034485e-06, "loss": 0.1524, "step": 108600 }, { "epoch": 1.07, "grad_norm": 5.538475513458252, "learning_rate": 3.30048275862069e-06, "loss": 0.2431, "step": 108625 }, { "epoch": 1.07, "grad_norm": 7.312997341156006, "learning_rate": 3.3000517241379314e-06, "loss": 0.136, "step": 108650 }, { "epoch": 1.07, "grad_norm": 4.57673454284668, "learning_rate": 3.2996206896551725e-06, "loss": 0.2246, "step": 108675 }, { "epoch": 1.07, "grad_norm": 9.183554649353027, "learning_rate": 3.299189655172414e-06, "loss": 0.1482, "step": 108700 }, { "epoch": 1.07, "grad_norm": 4.744228839874268, "learning_rate": 3.2987586206896554e-06, "loss": 0.3283, "step": 108725 }, { "epoch": 1.07, "grad_norm": 5.8321452140808105, "learning_rate": 3.298327586206897e-06, "loss": 0.1282, "step": 108750 }, { "epoch": 1.07, "grad_norm": 4.3183274269104, "learning_rate": 3.2978965517241383e-06, "loss": 0.2704, "step": 108775 }, { "epoch": 1.07, "grad_norm": 17.608488082885742, "learning_rate": 3.2974655172413793e-06, "loss": 0.1652, "step": 108800 }, { "epoch": 1.07, "grad_norm": 3.903271198272705, "learning_rate": 3.297034482758621e-06, "loss": 0.3076, "step": 108825 }, { "epoch": 1.07, "grad_norm": 8.130853652954102, "learning_rate": 3.2966034482758623e-06, "loss": 0.1943, "step": 108850 }, { "epoch": 1.07, "grad_norm": 3.4460575580596924, "learning_rate": 3.296172413793104e-06, "loss": 0.3098, "step": 108875 }, { "epoch": 1.07, "grad_norm": 7.937876224517822, "learning_rate": 3.2957413793103447e-06, "loss": 0.1475, "step": 108900 }, { "epoch": 1.07, "grad_norm": 4.763494968414307, "learning_rate": 3.295310344827586e-06, "loss": 0.2985, "step": 108925 }, { "epoch": 1.07, "grad_norm": 5.912958145141602, "learning_rate": 3.294879310344828e-06, "loss": 0.1491, "step": 108950 }, { "epoch": 1.07, "grad_norm": 3.9095091819763184, "learning_rate": 3.2944482758620695e-06, "loss": 0.2814, "step": 108975 }, { "epoch": 1.07, "grad_norm": 5.7295002937316895, "learning_rate": 3.29401724137931e-06, "loss": 0.1521, "step": 109000 }, { "epoch": 1.07, "grad_norm": 4.378574848175049, "learning_rate": 3.293586206896552e-06, "loss": 0.245, "step": 109025 }, { "epoch": 1.07, "grad_norm": 7.283492088317871, "learning_rate": 3.2931551724137935e-06, "loss": 0.1722, "step": 109050 }, { "epoch": 1.07, "grad_norm": 5.662230014801025, "learning_rate": 3.292724137931035e-06, "loss": 0.2997, "step": 109075 }, { "epoch": 1.07, "grad_norm": 6.529279708862305, "learning_rate": 3.2922931034482764e-06, "loss": 0.1598, "step": 109100 }, { "epoch": 1.07, "grad_norm": 4.11425256729126, "learning_rate": 3.2918620689655174e-06, "loss": 0.2248, "step": 109125 }, { "epoch": 1.07, "grad_norm": 5.108557224273682, "learning_rate": 3.291431034482759e-06, "loss": 0.111, "step": 109150 }, { "epoch": 1.07, "grad_norm": 3.5011582374572754, "learning_rate": 3.2910000000000003e-06, "loss": 0.2734, "step": 109175 }, { "epoch": 1.07, "grad_norm": 10.773645401000977, "learning_rate": 3.290568965517242e-06, "loss": 0.1673, "step": 109200 }, { "epoch": 1.07, "grad_norm": 5.193960189819336, "learning_rate": 3.290137931034483e-06, "loss": 0.3019, "step": 109225 }, { "epoch": 1.07, "grad_norm": 10.607759475708008, "learning_rate": 3.2897068965517243e-06, "loss": 0.1814, "step": 109250 }, { "epoch": 1.07, "grad_norm": 2.890691041946411, "learning_rate": 3.2892758620689657e-06, "loss": 0.2674, "step": 109275 }, { "epoch": 1.08, "grad_norm": 7.096671104431152, "learning_rate": 3.288844827586207e-06, "loss": 0.1668, "step": 109300 }, { "epoch": 1.08, "grad_norm": 4.524406433105469, "learning_rate": 3.2884137931034487e-06, "loss": 0.2624, "step": 109325 }, { "epoch": 1.08, "grad_norm": 12.519925117492676, "learning_rate": 3.2879827586206897e-06, "loss": 0.1565, "step": 109350 }, { "epoch": 1.08, "grad_norm": 7.30645751953125, "learning_rate": 3.287551724137931e-06, "loss": 0.2334, "step": 109375 }, { "epoch": 1.08, "grad_norm": 14.83720588684082, "learning_rate": 3.2871206896551726e-06, "loss": 0.1587, "step": 109400 }, { "epoch": 1.08, "grad_norm": 5.12529993057251, "learning_rate": 3.286689655172414e-06, "loss": 0.366, "step": 109425 }, { "epoch": 1.08, "grad_norm": 7.838972091674805, "learning_rate": 3.286258620689655e-06, "loss": 0.1438, "step": 109450 }, { "epoch": 1.08, "grad_norm": 4.393096446990967, "learning_rate": 3.2858275862068965e-06, "loss": 0.2501, "step": 109475 }, { "epoch": 1.08, "grad_norm": 7.280783176422119, "learning_rate": 3.285396551724138e-06, "loss": 0.135, "step": 109500 }, { "epoch": 1.08, "grad_norm": 4.166048049926758, "learning_rate": 3.28496551724138e-06, "loss": 0.2385, "step": 109525 }, { "epoch": 1.08, "grad_norm": 11.776299476623535, "learning_rate": 3.2845344827586213e-06, "loss": 0.1673, "step": 109550 }, { "epoch": 1.08, "grad_norm": 5.515748977661133, "learning_rate": 3.284103448275862e-06, "loss": 0.2893, "step": 109575 }, { "epoch": 1.08, "grad_norm": 14.286576271057129, "learning_rate": 3.283672413793104e-06, "loss": 0.1699, "step": 109600 }, { "epoch": 1.08, "grad_norm": 4.901423454284668, "learning_rate": 3.2832413793103453e-06, "loss": 0.3194, "step": 109625 }, { "epoch": 1.08, "grad_norm": 7.206299781799316, "learning_rate": 3.2828103448275867e-06, "loss": 0.1354, "step": 109650 }, { "epoch": 1.08, "grad_norm": 4.736725330352783, "learning_rate": 3.2823793103448278e-06, "loss": 0.2609, "step": 109675 }, { "epoch": 1.08, "grad_norm": 7.686342716217041, "learning_rate": 3.2819482758620692e-06, "loss": 0.122, "step": 109700 }, { "epoch": 1.08, "grad_norm": 4.759314060211182, "learning_rate": 3.281534482758621e-06, "loss": 0.2449, "step": 109725 }, { "epoch": 1.08, "grad_norm": 10.300215721130371, "learning_rate": 3.2811034482758626e-06, "loss": 0.1649, "step": 109750 }, { "epoch": 1.08, "grad_norm": 4.833341598510742, "learning_rate": 3.280672413793104e-06, "loss": 0.2694, "step": 109775 }, { "epoch": 1.08, "grad_norm": 11.031009674072266, "learning_rate": 3.280241379310345e-06, "loss": 0.1579, "step": 109800 }, { "epoch": 1.08, "grad_norm": 7.733720779418945, "learning_rate": 3.2798103448275865e-06, "loss": 0.2744, "step": 109825 }, { "epoch": 1.08, "grad_norm": 8.044781684875488, "learning_rate": 3.279379310344828e-06, "loss": 0.1508, "step": 109850 }, { "epoch": 1.08, "grad_norm": 5.13776159286499, "learning_rate": 3.2789482758620694e-06, "loss": 0.3233, "step": 109875 }, { "epoch": 1.08, "grad_norm": 10.936314582824707, "learning_rate": 3.2785172413793105e-06, "loss": 0.1494, "step": 109900 }, { "epoch": 1.08, "grad_norm": 4.439151763916016, "learning_rate": 3.278086206896552e-06, "loss": 0.3187, "step": 109925 }, { "epoch": 1.08, "grad_norm": 6.634314060211182, "learning_rate": 3.2776551724137934e-06, "loss": 0.1407, "step": 109950 }, { "epoch": 1.08, "grad_norm": 8.144579887390137, "learning_rate": 3.277224137931035e-06, "loss": 0.2918, "step": 109975 }, { "epoch": 1.08, "grad_norm": 6.573356628417969, "learning_rate": 3.2767931034482763e-06, "loss": 0.1286, "step": 110000 }, { "epoch": 1.08, "grad_norm": 4.544383525848389, "learning_rate": 3.2763620689655173e-06, "loss": 0.3544, "step": 110025 }, { "epoch": 1.08, "grad_norm": 2.8247523307800293, "learning_rate": 3.2759310344827588e-06, "loss": 0.1283, "step": 110050 }, { "epoch": 1.08, "grad_norm": 5.224421501159668, "learning_rate": 3.2755000000000002e-06, "loss": 0.333, "step": 110075 }, { "epoch": 1.08, "grad_norm": 6.0346760749816895, "learning_rate": 3.2750689655172417e-06, "loss": 0.1332, "step": 110100 }, { "epoch": 1.08, "grad_norm": 4.357372760772705, "learning_rate": 3.2746379310344827e-06, "loss": 0.2639, "step": 110125 }, { "epoch": 1.08, "grad_norm": 10.4007568359375, "learning_rate": 3.274206896551724e-06, "loss": 0.1155, "step": 110150 }, { "epoch": 1.08, "grad_norm": 4.227727890014648, "learning_rate": 3.2737758620689656e-06, "loss": 0.2756, "step": 110175 }, { "epoch": 1.08, "grad_norm": 10.847373962402344, "learning_rate": 3.2733448275862075e-06, "loss": 0.1805, "step": 110200 }, { "epoch": 1.08, "grad_norm": 5.110375881195068, "learning_rate": 3.272913793103449e-06, "loss": 0.3163, "step": 110225 }, { "epoch": 1.08, "grad_norm": 7.615623474121094, "learning_rate": 3.2724827586206896e-06, "loss": 0.1249, "step": 110250 }, { "epoch": 1.08, "grad_norm": 4.614964485168457, "learning_rate": 3.2720517241379315e-06, "loss": 0.3053, "step": 110275 }, { "epoch": 1.08, "grad_norm": 4.877101898193359, "learning_rate": 3.271620689655173e-06, "loss": 0.1552, "step": 110300 }, { "epoch": 1.09, "grad_norm": 5.1602559089660645, "learning_rate": 3.2711896551724144e-06, "loss": 0.2617, "step": 110325 }, { "epoch": 1.09, "grad_norm": 8.316916465759277, "learning_rate": 3.2707586206896554e-06, "loss": 0.1548, "step": 110350 }, { "epoch": 1.09, "grad_norm": 3.6188766956329346, "learning_rate": 3.270327586206897e-06, "loss": 0.3088, "step": 110375 }, { "epoch": 1.09, "grad_norm": 10.293791770935059, "learning_rate": 3.2698965517241383e-06, "loss": 0.1219, "step": 110400 }, { "epoch": 1.09, "grad_norm": 4.498456001281738, "learning_rate": 3.2694655172413798e-06, "loss": 0.2936, "step": 110425 }, { "epoch": 1.09, "grad_norm": 6.92275857925415, "learning_rate": 3.2690344827586212e-06, "loss": 0.1363, "step": 110450 }, { "epoch": 1.09, "grad_norm": 4.185989856719971, "learning_rate": 3.2686034482758623e-06, "loss": 0.3076, "step": 110475 }, { "epoch": 1.09, "grad_norm": 10.211430549621582, "learning_rate": 3.2681724137931037e-06, "loss": 0.1549, "step": 110500 }, { "epoch": 1.09, "grad_norm": 9.192177772521973, "learning_rate": 3.267741379310345e-06, "loss": 0.2769, "step": 110525 }, { "epoch": 1.09, "grad_norm": 12.293644905090332, "learning_rate": 3.2673103448275866e-06, "loss": 0.1533, "step": 110550 }, { "epoch": 1.09, "grad_norm": 4.840971946716309, "learning_rate": 3.2668793103448277e-06, "loss": 0.3262, "step": 110575 }, { "epoch": 1.09, "grad_norm": 9.9872407913208, "learning_rate": 3.266448275862069e-06, "loss": 0.1295, "step": 110600 }, { "epoch": 1.09, "grad_norm": 3.6501309871673584, "learning_rate": 3.2660172413793106e-06, "loss": 0.2872, "step": 110625 }, { "epoch": 1.09, "grad_norm": 6.530516624450684, "learning_rate": 3.265586206896552e-06, "loss": 0.1383, "step": 110650 }, { "epoch": 1.09, "grad_norm": 4.313230514526367, "learning_rate": 3.265155172413793e-06, "loss": 0.3468, "step": 110675 }, { "epoch": 1.09, "grad_norm": 9.032787322998047, "learning_rate": 3.2647241379310345e-06, "loss": 0.1788, "step": 110700 }, { "epoch": 1.09, "grad_norm": 5.0186543464660645, "learning_rate": 3.264293103448276e-06, "loss": 0.3131, "step": 110725 }, { "epoch": 1.09, "grad_norm": 7.148843288421631, "learning_rate": 3.2638620689655174e-06, "loss": 0.156, "step": 110750 }, { "epoch": 1.09, "grad_norm": 4.004832744598389, "learning_rate": 3.263431034482759e-06, "loss": 0.2809, "step": 110775 }, { "epoch": 1.09, "grad_norm": 8.775047302246094, "learning_rate": 3.263e-06, "loss": 0.1313, "step": 110800 }, { "epoch": 1.09, "grad_norm": 4.652998924255371, "learning_rate": 3.2625689655172414e-06, "loss": 0.3145, "step": 110825 }, { "epoch": 1.09, "grad_norm": 16.569055557250977, "learning_rate": 3.2621379310344833e-06, "loss": 0.1626, "step": 110850 }, { "epoch": 1.09, "grad_norm": 5.144525051116943, "learning_rate": 3.2617068965517247e-06, "loss": 0.293, "step": 110875 }, { "epoch": 1.09, "grad_norm": 9.23383903503418, "learning_rate": 3.2612758620689653e-06, "loss": 0.1241, "step": 110900 }, { "epoch": 1.09, "grad_norm": 3.6154215335845947, "learning_rate": 3.2608448275862072e-06, "loss": 0.3099, "step": 110925 }, { "epoch": 1.09, "grad_norm": 7.940160274505615, "learning_rate": 3.2604137931034487e-06, "loss": 0.1147, "step": 110950 }, { "epoch": 1.09, "grad_norm": 4.587257385253906, "learning_rate": 3.25998275862069e-06, "loss": 0.2851, "step": 110975 }, { "epoch": 1.09, "grad_norm": 18.651721954345703, "learning_rate": 3.2595517241379316e-06, "loss": 0.1845, "step": 111000 }, { "epoch": 1.09, "grad_norm": 5.2181243896484375, "learning_rate": 3.2591206896551726e-06, "loss": 0.3002, "step": 111025 }, { "epoch": 1.09, "grad_norm": 12.688179016113281, "learning_rate": 3.258689655172414e-06, "loss": 0.1943, "step": 111050 }, { "epoch": 1.09, "grad_norm": 4.081685543060303, "learning_rate": 3.2582586206896555e-06, "loss": 0.3247, "step": 111075 }, { "epoch": 1.09, "grad_norm": 15.218716621398926, "learning_rate": 3.257827586206897e-06, "loss": 0.157, "step": 111100 }, { "epoch": 1.09, "grad_norm": 6.680121898651123, "learning_rate": 3.257396551724138e-06, "loss": 0.3376, "step": 111125 }, { "epoch": 1.09, "grad_norm": 9.585458755493164, "learning_rate": 3.2569655172413795e-06, "loss": 0.1476, "step": 111150 }, { "epoch": 1.09, "grad_norm": 8.828465461730957, "learning_rate": 3.256534482758621e-06, "loss": 0.3217, "step": 111175 }, { "epoch": 1.09, "grad_norm": 10.876124382019043, "learning_rate": 3.2561034482758624e-06, "loss": 0.143, "step": 111200 }, { "epoch": 1.09, "grad_norm": 5.824877738952637, "learning_rate": 3.255672413793104e-06, "loss": 0.3077, "step": 111225 }, { "epoch": 1.09, "grad_norm": 2.6358723640441895, "learning_rate": 3.255241379310345e-06, "loss": 0.1362, "step": 111250 }, { "epoch": 1.09, "grad_norm": 3.6987757682800293, "learning_rate": 3.2548103448275863e-06, "loss": 0.2902, "step": 111275 }, { "epoch": 1.09, "grad_norm": 6.818912506103516, "learning_rate": 3.2543793103448278e-06, "loss": 0.1383, "step": 111300 }, { "epoch": 1.09, "grad_norm": 7.158359050750732, "learning_rate": 3.2539482758620692e-06, "loss": 0.3325, "step": 111325 }, { "epoch": 1.1, "grad_norm": 5.866434574127197, "learning_rate": 3.2535172413793103e-06, "loss": 0.1343, "step": 111350 }, { "epoch": 1.1, "grad_norm": 4.136610507965088, "learning_rate": 3.2530862068965517e-06, "loss": 0.2898, "step": 111375 }, { "epoch": 1.1, "grad_norm": 2.973923444747925, "learning_rate": 3.252655172413793e-06, "loss": 0.1272, "step": 111400 }, { "epoch": 1.1, "grad_norm": 4.044014930725098, "learning_rate": 3.252241379310345e-06, "loss": 0.2774, "step": 111425 }, { "epoch": 1.1, "grad_norm": 12.197905540466309, "learning_rate": 3.2518103448275865e-06, "loss": 0.1247, "step": 111450 }, { "epoch": 1.1, "grad_norm": 7.601804733276367, "learning_rate": 3.2513793103448276e-06, "loss": 0.3056, "step": 111475 }, { "epoch": 1.1, "grad_norm": 3.7716100215911865, "learning_rate": 3.250948275862069e-06, "loss": 0.1576, "step": 111500 }, { "epoch": 1.1, "grad_norm": 5.392796516418457, "learning_rate": 3.2505172413793105e-06, "loss": 0.2879, "step": 111525 }, { "epoch": 1.1, "grad_norm": 8.904813766479492, "learning_rate": 3.2500862068965524e-06, "loss": 0.1914, "step": 111550 }, { "epoch": 1.1, "grad_norm": 3.9306390285491943, "learning_rate": 3.249655172413793e-06, "loss": 0.2956, "step": 111575 }, { "epoch": 1.1, "grad_norm": 15.113388061523438, "learning_rate": 3.249224137931035e-06, "loss": 0.1829, "step": 111600 }, { "epoch": 1.1, "grad_norm": 5.571804523468018, "learning_rate": 3.2487931034482763e-06, "loss": 0.3214, "step": 111625 }, { "epoch": 1.1, "grad_norm": 11.304177284240723, "learning_rate": 3.2483620689655178e-06, "loss": 0.1208, "step": 111650 }, { "epoch": 1.1, "grad_norm": 3.3084981441497803, "learning_rate": 3.2479310344827592e-06, "loss": 0.312, "step": 111675 }, { "epoch": 1.1, "grad_norm": 4.672232627868652, "learning_rate": 3.2475000000000002e-06, "loss": 0.1448, "step": 111700 }, { "epoch": 1.1, "grad_norm": 5.0825419425964355, "learning_rate": 3.2470689655172417e-06, "loss": 0.2631, "step": 111725 }, { "epoch": 1.1, "grad_norm": 9.755027770996094, "learning_rate": 3.246637931034483e-06, "loss": 0.1544, "step": 111750 }, { "epoch": 1.1, "grad_norm": 4.165706634521484, "learning_rate": 3.2462068965517246e-06, "loss": 0.3087, "step": 111775 }, { "epoch": 1.1, "grad_norm": 11.99827766418457, "learning_rate": 3.2457758620689656e-06, "loss": 0.161, "step": 111800 }, { "epoch": 1.1, "grad_norm": 3.9522182941436768, "learning_rate": 3.245344827586207e-06, "loss": 0.2479, "step": 111825 }, { "epoch": 1.1, "grad_norm": 8.585763931274414, "learning_rate": 3.2449137931034486e-06, "loss": 0.1382, "step": 111850 }, { "epoch": 1.1, "grad_norm": 4.908032417297363, "learning_rate": 3.24448275862069e-06, "loss": 0.2787, "step": 111875 }, { "epoch": 1.1, "grad_norm": 14.014165878295898, "learning_rate": 3.2440517241379315e-06, "loss": 0.1568, "step": 111900 }, { "epoch": 1.1, "grad_norm": 4.119917869567871, "learning_rate": 3.2436206896551725e-06, "loss": 0.2743, "step": 111925 }, { "epoch": 1.1, "grad_norm": 8.623744010925293, "learning_rate": 3.243189655172414e-06, "loss": 0.1516, "step": 111950 }, { "epoch": 1.1, "grad_norm": 13.9291353225708, "learning_rate": 3.2427586206896554e-06, "loss": 0.2542, "step": 111975 }, { "epoch": 1.1, "grad_norm": 8.900325775146484, "learning_rate": 3.242327586206897e-06, "loss": 0.1528, "step": 112000 }, { "epoch": 1.1, "grad_norm": 6.134087085723877, "learning_rate": 3.241896551724138e-06, "loss": 0.405, "step": 112025 }, { "epoch": 1.1, "grad_norm": 10.056327819824219, "learning_rate": 3.2414655172413794e-06, "loss": 0.146, "step": 112050 }, { "epoch": 1.1, "grad_norm": 4.628635406494141, "learning_rate": 3.241034482758621e-06, "loss": 0.3146, "step": 112075 }, { "epoch": 1.1, "grad_norm": 7.026444911956787, "learning_rate": 3.2406034482758623e-06, "loss": 0.1314, "step": 112100 }, { "epoch": 1.1, "grad_norm": 5.042317867279053, "learning_rate": 3.240172413793104e-06, "loss": 0.2946, "step": 112125 }, { "epoch": 1.1, "grad_norm": 10.482976913452148, "learning_rate": 3.2397413793103448e-06, "loss": 0.1462, "step": 112150 }, { "epoch": 1.1, "grad_norm": 6.452094078063965, "learning_rate": 3.2393103448275866e-06, "loss": 0.3121, "step": 112175 }, { "epoch": 1.1, "grad_norm": 11.182969093322754, "learning_rate": 3.238879310344828e-06, "loss": 0.1597, "step": 112200 }, { "epoch": 1.1, "grad_norm": 3.223762035369873, "learning_rate": 3.2384482758620696e-06, "loss": 0.2965, "step": 112225 }, { "epoch": 1.1, "grad_norm": 8.941349029541016, "learning_rate": 3.2380172413793106e-06, "loss": 0.1401, "step": 112250 }, { "epoch": 1.1, "grad_norm": 7.645726203918457, "learning_rate": 3.237586206896552e-06, "loss": 0.2918, "step": 112275 }, { "epoch": 1.1, "grad_norm": 9.441131591796875, "learning_rate": 3.2371551724137935e-06, "loss": 0.1733, "step": 112300 }, { "epoch": 1.1, "grad_norm": 5.691770076751709, "learning_rate": 3.236724137931035e-06, "loss": 0.2633, "step": 112325 }, { "epoch": 1.11, "grad_norm": 7.4100446701049805, "learning_rate": 3.236293103448276e-06, "loss": 0.1312, "step": 112350 }, { "epoch": 1.11, "grad_norm": 3.5962347984313965, "learning_rate": 3.2358620689655175e-06, "loss": 0.2491, "step": 112375 }, { "epoch": 1.11, "grad_norm": 9.519753456115723, "learning_rate": 3.235431034482759e-06, "loss": 0.1455, "step": 112400 }, { "epoch": 1.11, "grad_norm": 5.48583984375, "learning_rate": 3.2350000000000004e-06, "loss": 0.327, "step": 112425 }, { "epoch": 1.11, "grad_norm": 5.923707485198975, "learning_rate": 3.234568965517242e-06, "loss": 0.1291, "step": 112450 }, { "epoch": 1.11, "grad_norm": 5.265143394470215, "learning_rate": 3.234137931034483e-06, "loss": 0.3156, "step": 112475 }, { "epoch": 1.11, "grad_norm": 8.6622314453125, "learning_rate": 3.2337068965517243e-06, "loss": 0.1336, "step": 112500 }, { "epoch": 1.11, "grad_norm": 4.915374755859375, "learning_rate": 3.2332758620689658e-06, "loss": 0.3161, "step": 112525 }, { "epoch": 1.11, "grad_norm": 8.37401008605957, "learning_rate": 3.2328448275862072e-06, "loss": 0.1254, "step": 112550 }, { "epoch": 1.11, "grad_norm": 4.157552719116211, "learning_rate": 3.2324137931034483e-06, "loss": 0.2262, "step": 112575 }, { "epoch": 1.11, "grad_norm": 7.995977401733398, "learning_rate": 3.2319827586206897e-06, "loss": 0.1052, "step": 112600 }, { "epoch": 1.11, "grad_norm": 4.598016738891602, "learning_rate": 3.231551724137931e-06, "loss": 0.3565, "step": 112625 }, { "epoch": 1.11, "grad_norm": 5.498622894287109, "learning_rate": 3.2311206896551726e-06, "loss": 0.1289, "step": 112650 }, { "epoch": 1.11, "grad_norm": 4.114995956420898, "learning_rate": 3.230689655172414e-06, "loss": 0.2467, "step": 112675 }, { "epoch": 1.11, "grad_norm": 7.0909953117370605, "learning_rate": 3.230258620689655e-06, "loss": 0.1464, "step": 112700 }, { "epoch": 1.11, "grad_norm": 4.24362325668335, "learning_rate": 3.2298275862068966e-06, "loss": 0.2763, "step": 112725 }, { "epoch": 1.11, "grad_norm": 6.475879192352295, "learning_rate": 3.229396551724138e-06, "loss": 0.1283, "step": 112750 }, { "epoch": 1.11, "grad_norm": 5.307402610778809, "learning_rate": 3.22896551724138e-06, "loss": 0.2694, "step": 112775 }, { "epoch": 1.11, "grad_norm": 5.43255615234375, "learning_rate": 3.2285344827586205e-06, "loss": 0.1243, "step": 112800 }, { "epoch": 1.11, "grad_norm": 6.083964824676514, "learning_rate": 3.2281034482758624e-06, "loss": 0.2993, "step": 112825 }, { "epoch": 1.11, "grad_norm": 38.7633056640625, "learning_rate": 3.227672413793104e-06, "loss": 0.1659, "step": 112850 }, { "epoch": 1.11, "grad_norm": 10.968369483947754, "learning_rate": 3.2272413793103453e-06, "loss": 0.2874, "step": 112875 }, { "epoch": 1.11, "grad_norm": 10.164978981018066, "learning_rate": 3.2268103448275868e-06, "loss": 0.1467, "step": 112900 }, { "epoch": 1.11, "grad_norm": 4.536623001098633, "learning_rate": 3.226379310344828e-06, "loss": 0.263, "step": 112925 }, { "epoch": 1.11, "grad_norm": 7.5011186599731445, "learning_rate": 3.2259482758620693e-06, "loss": 0.1547, "step": 112950 }, { "epoch": 1.11, "grad_norm": 5.413849830627441, "learning_rate": 3.2255172413793107e-06, "loss": 0.305, "step": 112975 }, { "epoch": 1.11, "grad_norm": 0.17356812953948975, "learning_rate": 3.225086206896552e-06, "loss": 0.1439, "step": 113000 }, { "epoch": 1.11, "grad_norm": 8.519996643066406, "learning_rate": 3.224655172413793e-06, "loss": 0.3516, "step": 113025 }, { "epoch": 1.11, "grad_norm": 7.799322128295898, "learning_rate": 3.2242241379310347e-06, "loss": 0.1379, "step": 113050 }, { "epoch": 1.11, "grad_norm": 23.894636154174805, "learning_rate": 3.223793103448276e-06, "loss": 0.3125, "step": 113075 }, { "epoch": 1.11, "grad_norm": 4.438114643096924, "learning_rate": 3.2233620689655176e-06, "loss": 0.1529, "step": 113100 }, { "epoch": 1.11, "grad_norm": 4.864850997924805, "learning_rate": 3.222931034482759e-06, "loss": 0.2689, "step": 113125 }, { "epoch": 1.11, "grad_norm": 5.811806678771973, "learning_rate": 3.2225e-06, "loss": 0.1048, "step": 113150 }, { "epoch": 1.11, "grad_norm": 5.379547119140625, "learning_rate": 3.2220689655172415e-06, "loss": 0.2812, "step": 113175 }, { "epoch": 1.11, "grad_norm": 7.258357048034668, "learning_rate": 3.221637931034483e-06, "loss": 0.1591, "step": 113200 }, { "epoch": 1.11, "grad_norm": 4.6104512214660645, "learning_rate": 3.2212068965517244e-06, "loss": 0.2422, "step": 113225 }, { "epoch": 1.11, "grad_norm": 5.855809211730957, "learning_rate": 3.2207758620689655e-06, "loss": 0.1624, "step": 113250 }, { "epoch": 1.11, "grad_norm": 5.30782413482666, "learning_rate": 3.220344827586207e-06, "loss": 0.292, "step": 113275 }, { "epoch": 1.11, "grad_norm": 11.909806251525879, "learning_rate": 3.2199137931034484e-06, "loss": 0.1502, "step": 113300 }, { "epoch": 1.11, "grad_norm": 4.420931816101074, "learning_rate": 3.21948275862069e-06, "loss": 0.3008, "step": 113325 }, { "epoch": 1.11, "grad_norm": 5.128617763519287, "learning_rate": 3.2190517241379317e-06, "loss": 0.1471, "step": 113350 }, { "epoch": 1.12, "grad_norm": 3.93354868888855, "learning_rate": 3.2186206896551723e-06, "loss": 0.2643, "step": 113375 }, { "epoch": 1.12, "grad_norm": 7.480708599090576, "learning_rate": 3.218189655172414e-06, "loss": 0.1262, "step": 113400 }, { "epoch": 1.12, "grad_norm": 5.305004119873047, "learning_rate": 3.2177586206896557e-06, "loss": 0.277, "step": 113425 }, { "epoch": 1.12, "grad_norm": 2.049478054046631, "learning_rate": 3.217327586206897e-06, "loss": 0.1708, "step": 113450 }, { "epoch": 1.12, "grad_norm": 4.352942943572998, "learning_rate": 3.216896551724138e-06, "loss": 0.3483, "step": 113475 }, { "epoch": 1.12, "grad_norm": 6.818527698516846, "learning_rate": 3.2164655172413796e-06, "loss": 0.1715, "step": 113500 }, { "epoch": 1.12, "grad_norm": 3.5498545169830322, "learning_rate": 3.216034482758621e-06, "loss": 0.3022, "step": 113525 }, { "epoch": 1.12, "grad_norm": 5.974918842315674, "learning_rate": 3.2156034482758625e-06, "loss": 0.1714, "step": 113550 }, { "epoch": 1.12, "grad_norm": 4.327347278594971, "learning_rate": 3.215172413793104e-06, "loss": 0.3324, "step": 113575 }, { "epoch": 1.12, "grad_norm": 5.95267915725708, "learning_rate": 3.214741379310345e-06, "loss": 0.1435, "step": 113600 }, { "epoch": 1.12, "grad_norm": 3.646831512451172, "learning_rate": 3.2143103448275865e-06, "loss": 0.2462, "step": 113625 }, { "epoch": 1.12, "grad_norm": 9.969099998474121, "learning_rate": 3.213879310344828e-06, "loss": 0.1425, "step": 113650 }, { "epoch": 1.12, "grad_norm": 6.966679573059082, "learning_rate": 3.2134482758620694e-06, "loss": 0.2532, "step": 113675 }, { "epoch": 1.12, "grad_norm": 13.272634506225586, "learning_rate": 3.2130172413793104e-06, "loss": 0.1268, "step": 113700 }, { "epoch": 1.12, "grad_norm": 6.5233073234558105, "learning_rate": 3.212586206896552e-06, "loss": 0.2825, "step": 113725 }, { "epoch": 1.12, "grad_norm": 3.2669260501861572, "learning_rate": 3.2121551724137933e-06, "loss": 0.0987, "step": 113750 }, { "epoch": 1.12, "grad_norm": 3.665022850036621, "learning_rate": 3.2117241379310348e-06, "loss": 0.2875, "step": 113775 }, { "epoch": 1.12, "grad_norm": 5.234879493713379, "learning_rate": 3.2112931034482762e-06, "loss": 0.1619, "step": 113800 }, { "epoch": 1.12, "grad_norm": 5.069450855255127, "learning_rate": 3.2108620689655173e-06, "loss": 0.2892, "step": 113825 }, { "epoch": 1.12, "grad_norm": 10.223833084106445, "learning_rate": 3.2104310344827587e-06, "loss": 0.1246, "step": 113850 }, { "epoch": 1.12, "grad_norm": 5.0338640213012695, "learning_rate": 3.21e-06, "loss": 0.2885, "step": 113875 }, { "epoch": 1.12, "grad_norm": 13.576749801635742, "learning_rate": 3.2095689655172416e-06, "loss": 0.1601, "step": 113900 }, { "epoch": 1.12, "grad_norm": 8.419998168945312, "learning_rate": 3.2091379310344827e-06, "loss": 0.2582, "step": 113925 }, { "epoch": 1.12, "grad_norm": 7.8250813484191895, "learning_rate": 3.208706896551724e-06, "loss": 0.1385, "step": 113950 }, { "epoch": 1.12, "grad_norm": 4.78797721862793, "learning_rate": 3.2082758620689656e-06, "loss": 0.2367, "step": 113975 }, { "epoch": 1.12, "grad_norm": 10.457942962646484, "learning_rate": 3.2078448275862075e-06, "loss": 0.1239, "step": 114000 }, { "epoch": 1.12, "grad_norm": 22.057147979736328, "learning_rate": 3.207413793103449e-06, "loss": 0.2275, "step": 114025 }, { "epoch": 1.12, "grad_norm": 8.689535140991211, "learning_rate": 3.20698275862069e-06, "loss": 0.158, "step": 114050 }, { "epoch": 1.12, "grad_norm": 3.6158759593963623, "learning_rate": 3.2065517241379314e-06, "loss": 0.382, "step": 114075 }, { "epoch": 1.12, "grad_norm": 5.4090728759765625, "learning_rate": 3.206120689655173e-06, "loss": 0.143, "step": 114100 }, { "epoch": 1.12, "grad_norm": 5.374869346618652, "learning_rate": 3.2056896551724143e-06, "loss": 0.292, "step": 114125 }, { "epoch": 1.12, "grad_norm": 9.310783386230469, "learning_rate": 3.2052586206896554e-06, "loss": 0.1566, "step": 114150 }, { "epoch": 1.12, "grad_norm": 5.330989360809326, "learning_rate": 3.204827586206897e-06, "loss": 0.2662, "step": 114175 }, { "epoch": 1.12, "grad_norm": 5.478514671325684, "learning_rate": 3.2043965517241383e-06, "loss": 0.1198, "step": 114200 }, { "epoch": 1.12, "grad_norm": 3.1292524337768555, "learning_rate": 3.2039655172413797e-06, "loss": 0.3215, "step": 114225 }, { "epoch": 1.12, "grad_norm": 9.65989875793457, "learning_rate": 3.203534482758621e-06, "loss": 0.1865, "step": 114250 }, { "epoch": 1.12, "grad_norm": 5.4728474617004395, "learning_rate": 3.2031034482758622e-06, "loss": 0.3236, "step": 114275 }, { "epoch": 1.12, "grad_norm": 10.24694538116455, "learning_rate": 3.2026724137931037e-06, "loss": 0.155, "step": 114300 }, { "epoch": 1.12, "grad_norm": 5.062960147857666, "learning_rate": 3.202241379310345e-06, "loss": 0.2865, "step": 114325 }, { "epoch": 1.12, "grad_norm": 13.903542518615723, "learning_rate": 3.2018103448275866e-06, "loss": 0.1844, "step": 114350 }, { "epoch": 1.12, "grad_norm": 4.385568141937256, "learning_rate": 3.2013793103448276e-06, "loss": 0.2887, "step": 114375 }, { "epoch": 1.13, "grad_norm": 5.9742326736450195, "learning_rate": 3.200948275862069e-06, "loss": 0.1358, "step": 114400 }, { "epoch": 1.13, "grad_norm": 4.239022731781006, "learning_rate": 3.2005172413793105e-06, "loss": 0.336, "step": 114425 }, { "epoch": 1.13, "grad_norm": 2.054044485092163, "learning_rate": 3.200086206896552e-06, "loss": 0.1625, "step": 114450 }, { "epoch": 1.13, "grad_norm": 10.227458000183105, "learning_rate": 3.1996551724137934e-06, "loss": 0.2851, "step": 114475 }, { "epoch": 1.13, "grad_norm": 13.63179874420166, "learning_rate": 3.1992241379310345e-06, "loss": 0.175, "step": 114500 }, { "epoch": 1.13, "grad_norm": 4.952764987945557, "learning_rate": 3.198793103448276e-06, "loss": 0.3293, "step": 114525 }, { "epoch": 1.13, "grad_norm": 8.370049476623535, "learning_rate": 3.1983620689655174e-06, "loss": 0.1369, "step": 114550 }, { "epoch": 1.13, "grad_norm": 3.9606878757476807, "learning_rate": 3.1979310344827593e-06, "loss": 0.2704, "step": 114575 }, { "epoch": 1.13, "grad_norm": 14.222318649291992, "learning_rate": 3.1975e-06, "loss": 0.1338, "step": 114600 }, { "epoch": 1.13, "grad_norm": 3.828732490539551, "learning_rate": 3.1970689655172418e-06, "loss": 0.2677, "step": 114625 }, { "epoch": 1.13, "grad_norm": 3.1791608333587646, "learning_rate": 3.1966379310344832e-06, "loss": 0.1137, "step": 114650 }, { "epoch": 1.13, "grad_norm": 4.465324401855469, "learning_rate": 3.1962068965517247e-06, "loss": 0.307, "step": 114675 }, { "epoch": 1.13, "grad_norm": 10.353102684020996, "learning_rate": 3.1957758620689657e-06, "loss": 0.1988, "step": 114700 }, { "epoch": 1.13, "grad_norm": 8.140467643737793, "learning_rate": 3.195344827586207e-06, "loss": 0.3855, "step": 114725 }, { "epoch": 1.13, "grad_norm": 10.016942024230957, "learning_rate": 3.1949137931034486e-06, "loss": 0.1436, "step": 114750 }, { "epoch": 1.13, "grad_norm": 8.275634765625, "learning_rate": 3.19448275862069e-06, "loss": 0.2606, "step": 114775 }, { "epoch": 1.13, "grad_norm": 8.76644229888916, "learning_rate": 3.1940517241379315e-06, "loss": 0.1454, "step": 114800 }, { "epoch": 1.13, "grad_norm": 4.938336372375488, "learning_rate": 3.1936206896551726e-06, "loss": 0.3073, "step": 114825 }, { "epoch": 1.13, "grad_norm": 10.80545425415039, "learning_rate": 3.193189655172414e-06, "loss": 0.1831, "step": 114850 }, { "epoch": 1.13, "grad_norm": 4.745724201202393, "learning_rate": 3.1927586206896555e-06, "loss": 0.2478, "step": 114875 }, { "epoch": 1.13, "grad_norm": 9.770601272583008, "learning_rate": 3.192327586206897e-06, "loss": 0.1408, "step": 114900 }, { "epoch": 1.13, "grad_norm": 4.8589630126953125, "learning_rate": 3.191896551724138e-06, "loss": 0.315, "step": 114925 }, { "epoch": 1.13, "grad_norm": 5.417369842529297, "learning_rate": 3.1914655172413794e-06, "loss": 0.1088, "step": 114950 }, { "epoch": 1.13, "grad_norm": 4.063895225524902, "learning_rate": 3.191034482758621e-06, "loss": 0.2918, "step": 114975 }, { "epoch": 1.13, "grad_norm": 4.272153854370117, "learning_rate": 3.1906034482758623e-06, "loss": 0.1393, "step": 115000 }, { "epoch": 1.13, "grad_norm": 4.460615158081055, "learning_rate": 3.190172413793104e-06, "loss": 0.264, "step": 115025 }, { "epoch": 1.13, "grad_norm": 10.117389678955078, "learning_rate": 3.189741379310345e-06, "loss": 0.1488, "step": 115050 }, { "epoch": 1.13, "grad_norm": 5.46649169921875, "learning_rate": 3.1893103448275863e-06, "loss": 0.3471, "step": 115075 }, { "epoch": 1.13, "grad_norm": 8.18510627746582, "learning_rate": 3.1888793103448277e-06, "loss": 0.1258, "step": 115100 }, { "epoch": 1.13, "grad_norm": 3.3308472633361816, "learning_rate": 3.188448275862069e-06, "loss": 0.2554, "step": 115125 }, { "epoch": 1.13, "grad_norm": 12.85835075378418, "learning_rate": 3.1880172413793102e-06, "loss": 0.2068, "step": 115150 }, { "epoch": 1.13, "grad_norm": 4.6229424476623535, "learning_rate": 3.1875862068965517e-06, "loss": 0.2501, "step": 115175 }, { "epoch": 1.13, "grad_norm": 5.827660083770752, "learning_rate": 3.1871551724137936e-06, "loss": 0.1115, "step": 115200 }, { "epoch": 1.13, "grad_norm": 4.176805019378662, "learning_rate": 3.186724137931035e-06, "loss": 0.2663, "step": 115225 }, { "epoch": 1.13, "grad_norm": 6.927261829376221, "learning_rate": 3.1862931034482765e-06, "loss": 0.163, "step": 115250 }, { "epoch": 1.13, "grad_norm": 6.684842586517334, "learning_rate": 3.1858620689655175e-06, "loss": 0.3288, "step": 115275 }, { "epoch": 1.13, "grad_norm": 7.335128307342529, "learning_rate": 3.185431034482759e-06, "loss": 0.113, "step": 115300 }, { "epoch": 1.13, "grad_norm": 3.3061013221740723, "learning_rate": 3.1850000000000004e-06, "loss": 0.2589, "step": 115325 }, { "epoch": 1.13, "grad_norm": 8.559904098510742, "learning_rate": 3.184568965517242e-06, "loss": 0.1399, "step": 115350 }, { "epoch": 1.13, "grad_norm": 4.922614574432373, "learning_rate": 3.184137931034483e-06, "loss": 0.2171, "step": 115375 }, { "epoch": 1.14, "grad_norm": 7.386072635650635, "learning_rate": 3.1837068965517244e-06, "loss": 0.1151, "step": 115400 }, { "epoch": 1.14, "grad_norm": 4.229808807373047, "learning_rate": 3.183275862068966e-06, "loss": 0.3424, "step": 115425 }, { "epoch": 1.14, "grad_norm": 10.02163028717041, "learning_rate": 3.1828448275862073e-06, "loss": 0.1461, "step": 115450 }, { "epoch": 1.14, "grad_norm": 4.627892017364502, "learning_rate": 3.182431034482759e-06, "loss": 0.3017, "step": 115475 }, { "epoch": 1.14, "grad_norm": 4.855826377868652, "learning_rate": 3.182e-06, "loss": 0.14, "step": 115500 }, { "epoch": 1.14, "grad_norm": 5.672911167144775, "learning_rate": 3.1815689655172417e-06, "loss": 0.2713, "step": 115525 }, { "epoch": 1.14, "grad_norm": 7.918604373931885, "learning_rate": 3.181137931034483e-06, "loss": 0.1406, "step": 115550 }, { "epoch": 1.14, "grad_norm": 3.103107213973999, "learning_rate": 3.1807068965517246e-06, "loss": 0.3228, "step": 115575 }, { "epoch": 1.14, "grad_norm": 7.281835556030273, "learning_rate": 3.1802758620689656e-06, "loss": 0.1465, "step": 115600 }, { "epoch": 1.14, "grad_norm": 4.751455307006836, "learning_rate": 3.179844827586207e-06, "loss": 0.2921, "step": 115625 }, { "epoch": 1.14, "grad_norm": 13.424504280090332, "learning_rate": 3.1794137931034485e-06, "loss": 0.1953, "step": 115650 }, { "epoch": 1.14, "grad_norm": 3.8359873294830322, "learning_rate": 3.17898275862069e-06, "loss": 0.282, "step": 115675 }, { "epoch": 1.14, "grad_norm": 4.758774757385254, "learning_rate": 3.1785517241379314e-06, "loss": 0.155, "step": 115700 }, { "epoch": 1.14, "grad_norm": 7.137385845184326, "learning_rate": 3.1781206896551725e-06, "loss": 0.2832, "step": 115725 }, { "epoch": 1.14, "grad_norm": 11.882250785827637, "learning_rate": 3.177689655172414e-06, "loss": 0.1647, "step": 115750 }, { "epoch": 1.14, "grad_norm": 4.418809413909912, "learning_rate": 3.1772586206896554e-06, "loss": 0.2303, "step": 115775 }, { "epoch": 1.14, "grad_norm": 7.527054309844971, "learning_rate": 3.176827586206897e-06, "loss": 0.1361, "step": 115800 }, { "epoch": 1.14, "grad_norm": 6.114880084991455, "learning_rate": 3.176396551724138e-06, "loss": 0.3004, "step": 115825 }, { "epoch": 1.14, "grad_norm": 6.286126613616943, "learning_rate": 3.1759655172413793e-06, "loss": 0.1607, "step": 115850 }, { "epoch": 1.14, "grad_norm": 4.249058723449707, "learning_rate": 3.1755344827586208e-06, "loss": 0.2457, "step": 115875 }, { "epoch": 1.14, "grad_norm": 9.751641273498535, "learning_rate": 3.1751034482758627e-06, "loss": 0.1985, "step": 115900 }, { "epoch": 1.14, "grad_norm": 4.486483097076416, "learning_rate": 3.174672413793104e-06, "loss": 0.3439, "step": 115925 }, { "epoch": 1.14, "grad_norm": 5.886117458343506, "learning_rate": 3.1742413793103447e-06, "loss": 0.1395, "step": 115950 }, { "epoch": 1.14, "grad_norm": 4.666757583618164, "learning_rate": 3.1738103448275866e-06, "loss": 0.3277, "step": 115975 }, { "epoch": 1.14, "grad_norm": 5.336255073547363, "learning_rate": 3.173379310344828e-06, "loss": 0.151, "step": 116000 }, { "epoch": 1.14, "grad_norm": 4.810086250305176, "learning_rate": 3.1729482758620695e-06, "loss": 0.3013, "step": 116025 }, { "epoch": 1.14, "grad_norm": 11.739815711975098, "learning_rate": 3.1725172413793105e-06, "loss": 0.1617, "step": 116050 }, { "epoch": 1.14, "grad_norm": 4.277310371398926, "learning_rate": 3.172086206896552e-06, "loss": 0.2863, "step": 116075 }, { "epoch": 1.14, "grad_norm": 12.792197227478027, "learning_rate": 3.1716551724137935e-06, "loss": 0.1444, "step": 116100 }, { "epoch": 1.14, "grad_norm": 3.4954936504364014, "learning_rate": 3.171224137931035e-06, "loss": 0.2147, "step": 116125 }, { "epoch": 1.14, "grad_norm": 10.170063972473145, "learning_rate": 3.170793103448276e-06, "loss": 0.128, "step": 116150 }, { "epoch": 1.14, "grad_norm": 4.343980312347412, "learning_rate": 3.1703620689655174e-06, "loss": 0.225, "step": 116175 }, { "epoch": 1.14, "grad_norm": 7.874047756195068, "learning_rate": 3.169931034482759e-06, "loss": 0.1539, "step": 116200 }, { "epoch": 1.14, "grad_norm": 4.582138538360596, "learning_rate": 3.1695000000000003e-06, "loss": 0.3151, "step": 116225 }, { "epoch": 1.14, "grad_norm": 5.656986713409424, "learning_rate": 3.1690689655172418e-06, "loss": 0.1365, "step": 116250 }, { "epoch": 1.14, "grad_norm": 4.842398166656494, "learning_rate": 3.168637931034483e-06, "loss": 0.2853, "step": 116275 }, { "epoch": 1.14, "grad_norm": 7.548626899719238, "learning_rate": 3.1682068965517243e-06, "loss": 0.1233, "step": 116300 }, { "epoch": 1.14, "grad_norm": 4.3514180183410645, "learning_rate": 3.1677758620689657e-06, "loss": 0.2258, "step": 116325 }, { "epoch": 1.14, "grad_norm": 7.886683940887451, "learning_rate": 3.167344827586207e-06, "loss": 0.1398, "step": 116350 }, { "epoch": 1.14, "grad_norm": 5.103548049926758, "learning_rate": 3.166913793103448e-06, "loss": 0.318, "step": 116375 }, { "epoch": 1.14, "grad_norm": 8.912496566772461, "learning_rate": 3.1664827586206897e-06, "loss": 0.132, "step": 116400 }, { "epoch": 1.15, "grad_norm": 4.62424898147583, "learning_rate": 3.166051724137931e-06, "loss": 0.3218, "step": 116425 }, { "epoch": 1.15, "grad_norm": 8.582551002502441, "learning_rate": 3.1656206896551726e-06, "loss": 0.1328, "step": 116450 }, { "epoch": 1.15, "grad_norm": 5.307713985443115, "learning_rate": 3.1651896551724145e-06, "loss": 0.2521, "step": 116475 }, { "epoch": 1.15, "grad_norm": 5.058102607727051, "learning_rate": 3.164758620689655e-06, "loss": 0.1194, "step": 116500 }, { "epoch": 1.15, "grad_norm": 7.216590404510498, "learning_rate": 3.1643275862068965e-06, "loss": 0.3176, "step": 116525 }, { "epoch": 1.15, "grad_norm": 7.491619110107422, "learning_rate": 3.1638965517241384e-06, "loss": 0.1379, "step": 116550 }, { "epoch": 1.15, "grad_norm": 3.220747470855713, "learning_rate": 3.16346551724138e-06, "loss": 0.3128, "step": 116575 }, { "epoch": 1.15, "grad_norm": 5.580753326416016, "learning_rate": 3.163034482758621e-06, "loss": 0.1633, "step": 116600 }, { "epoch": 1.15, "grad_norm": 5.108368873596191, "learning_rate": 3.1626034482758623e-06, "loss": 0.2543, "step": 116625 }, { "epoch": 1.15, "grad_norm": 7.4405694007873535, "learning_rate": 3.162172413793104e-06, "loss": 0.1314, "step": 116650 }, { "epoch": 1.15, "grad_norm": 4.267054557800293, "learning_rate": 3.1617413793103453e-06, "loss": 0.2839, "step": 116675 }, { "epoch": 1.15, "grad_norm": 10.434131622314453, "learning_rate": 3.1613103448275867e-06, "loss": 0.1524, "step": 116700 }, { "epoch": 1.15, "grad_norm": 4.076469898223877, "learning_rate": 3.1608793103448278e-06, "loss": 0.3003, "step": 116725 }, { "epoch": 1.15, "grad_norm": 8.880414009094238, "learning_rate": 3.160448275862069e-06, "loss": 0.1184, "step": 116750 }, { "epoch": 1.15, "grad_norm": 6.553768634796143, "learning_rate": 3.1600172413793107e-06, "loss": 0.3193, "step": 116775 }, { "epoch": 1.15, "grad_norm": 13.744793891906738, "learning_rate": 3.159586206896552e-06, "loss": 0.1394, "step": 116800 }, { "epoch": 1.15, "grad_norm": 7.2299723625183105, "learning_rate": 3.159155172413793e-06, "loss": 0.3304, "step": 116825 }, { "epoch": 1.15, "grad_norm": 14.252623558044434, "learning_rate": 3.1587241379310346e-06, "loss": 0.1423, "step": 116850 }, { "epoch": 1.15, "grad_norm": 5.605360984802246, "learning_rate": 3.158293103448276e-06, "loss": 0.2479, "step": 116875 }, { "epoch": 1.15, "grad_norm": 9.05370807647705, "learning_rate": 3.1578620689655175e-06, "loss": 0.1522, "step": 116900 }, { "epoch": 1.15, "grad_norm": 3.993725299835205, "learning_rate": 3.157431034482759e-06, "loss": 0.3184, "step": 116925 }, { "epoch": 1.15, "grad_norm": 12.382701873779297, "learning_rate": 3.157e-06, "loss": 0.1636, "step": 116950 }, { "epoch": 1.15, "grad_norm": 4.160519123077393, "learning_rate": 3.1565689655172415e-06, "loss": 0.3217, "step": 116975 }, { "epoch": 1.15, "grad_norm": 10.560558319091797, "learning_rate": 3.156137931034483e-06, "loss": 0.1213, "step": 117000 }, { "epoch": 1.15, "grad_norm": 4.492298126220703, "learning_rate": 3.1557068965517244e-06, "loss": 0.2896, "step": 117025 }, { "epoch": 1.15, "grad_norm": 10.3744535446167, "learning_rate": 3.1552758620689654e-06, "loss": 0.1565, "step": 117050 }, { "epoch": 1.15, "grad_norm": 4.229739189147949, "learning_rate": 3.154844827586207e-06, "loss": 0.2718, "step": 117075 }, { "epoch": 1.15, "grad_norm": 5.968569755554199, "learning_rate": 3.1544137931034483e-06, "loss": 0.1473, "step": 117100 }, { "epoch": 1.15, "grad_norm": 4.4023051261901855, "learning_rate": 3.15398275862069e-06, "loss": 0.2784, "step": 117125 }, { "epoch": 1.15, "grad_norm": 5.5644917488098145, "learning_rate": 3.1535517241379317e-06, "loss": 0.1555, "step": 117150 }, { "epoch": 1.15, "grad_norm": 4.052086353302002, "learning_rate": 3.1531206896551723e-06, "loss": 0.2879, "step": 117175 }, { "epoch": 1.15, "grad_norm": 5.724864482879639, "learning_rate": 3.152689655172414e-06, "loss": 0.1915, "step": 117200 }, { "epoch": 1.15, "grad_norm": 4.426027774810791, "learning_rate": 3.1522586206896556e-06, "loss": 0.2506, "step": 117225 }, { "epoch": 1.15, "grad_norm": 11.227113723754883, "learning_rate": 3.151827586206897e-06, "loss": 0.1483, "step": 117250 }, { "epoch": 1.15, "grad_norm": 3.6791138648986816, "learning_rate": 3.151396551724138e-06, "loss": 0.2855, "step": 117275 }, { "epoch": 1.15, "grad_norm": 6.664927005767822, "learning_rate": 3.1509655172413796e-06, "loss": 0.1229, "step": 117300 }, { "epoch": 1.15, "grad_norm": 5.96844482421875, "learning_rate": 3.150534482758621e-06, "loss": 0.2499, "step": 117325 }, { "epoch": 1.15, "grad_norm": 7.334805965423584, "learning_rate": 3.1501034482758625e-06, "loss": 0.1356, "step": 117350 }, { "epoch": 1.15, "grad_norm": 4.07557487487793, "learning_rate": 3.149672413793104e-06, "loss": 0.3212, "step": 117375 }, { "epoch": 1.15, "grad_norm": 10.246469497680664, "learning_rate": 3.149241379310345e-06, "loss": 0.1507, "step": 117400 }, { "epoch": 1.15, "grad_norm": 4.747322082519531, "learning_rate": 3.1488103448275864e-06, "loss": 0.3258, "step": 117425 }, { "epoch": 1.16, "grad_norm": 11.715132713317871, "learning_rate": 3.148379310344828e-06, "loss": 0.1437, "step": 117450 }, { "epoch": 1.16, "grad_norm": 3.318629264831543, "learning_rate": 3.1479482758620693e-06, "loss": 0.2344, "step": 117475 }, { "epoch": 1.16, "grad_norm": 4.427136421203613, "learning_rate": 3.1475172413793104e-06, "loss": 0.1175, "step": 117500 }, { "epoch": 1.16, "grad_norm": 3.4755380153656006, "learning_rate": 3.147086206896552e-06, "loss": 0.2576, "step": 117525 }, { "epoch": 1.16, "grad_norm": 4.412223815917969, "learning_rate": 3.1466551724137933e-06, "loss": 0.1273, "step": 117550 }, { "epoch": 1.16, "grad_norm": 5.012177467346191, "learning_rate": 3.1462241379310347e-06, "loss": 0.2697, "step": 117575 }, { "epoch": 1.16, "grad_norm": 9.903834342956543, "learning_rate": 3.145793103448276e-06, "loss": 0.142, "step": 117600 }, { "epoch": 1.16, "grad_norm": 4.61440896987915, "learning_rate": 3.1453793103448276e-06, "loss": 0.3443, "step": 117625 }, { "epoch": 1.16, "grad_norm": 5.229649066925049, "learning_rate": 3.144948275862069e-06, "loss": 0.1558, "step": 117650 }, { "epoch": 1.16, "grad_norm": 5.906344890594482, "learning_rate": 3.1445172413793106e-06, "loss": 0.3661, "step": 117675 }, { "epoch": 1.16, "grad_norm": 5.2668657302856445, "learning_rate": 3.144086206896552e-06, "loss": 0.1444, "step": 117700 }, { "epoch": 1.16, "grad_norm": 5.417515277862549, "learning_rate": 3.143655172413793e-06, "loss": 0.3099, "step": 117725 }, { "epoch": 1.16, "grad_norm": 8.143030166625977, "learning_rate": 3.1432241379310345e-06, "loss": 0.1586, "step": 117750 }, { "epoch": 1.16, "grad_norm": 6.457624435424805, "learning_rate": 3.142793103448276e-06, "loss": 0.3123, "step": 117775 }, { "epoch": 1.16, "grad_norm": 9.938380241394043, "learning_rate": 3.142362068965518e-06, "loss": 0.1728, "step": 117800 }, { "epoch": 1.16, "grad_norm": 5.203339099884033, "learning_rate": 3.1419310344827585e-06, "loss": 0.3337, "step": 117825 }, { "epoch": 1.16, "grad_norm": 7.890429496765137, "learning_rate": 3.1415e-06, "loss": 0.1521, "step": 117850 }, { "epoch": 1.16, "grad_norm": 18.839391708374023, "learning_rate": 3.1410689655172418e-06, "loss": 0.3115, "step": 117875 }, { "epoch": 1.16, "grad_norm": 7.371755123138428, "learning_rate": 3.1406379310344832e-06, "loss": 0.1459, "step": 117900 }, { "epoch": 1.16, "grad_norm": 3.158653497695923, "learning_rate": 3.1402068965517247e-06, "loss": 0.293, "step": 117925 }, { "epoch": 1.16, "grad_norm": 11.361528396606445, "learning_rate": 3.1397758620689657e-06, "loss": 0.1382, "step": 117950 }, { "epoch": 1.16, "grad_norm": 4.711281776428223, "learning_rate": 3.139344827586207e-06, "loss": 0.2901, "step": 117975 }, { "epoch": 1.16, "grad_norm": 6.663392543792725, "learning_rate": 3.1389137931034486e-06, "loss": 0.1226, "step": 118000 }, { "epoch": 1.16, "grad_norm": 3.7185840606689453, "learning_rate": 3.13848275862069e-06, "loss": 0.2818, "step": 118025 }, { "epoch": 1.16, "grad_norm": 7.931743621826172, "learning_rate": 3.138051724137931e-06, "loss": 0.1503, "step": 118050 }, { "epoch": 1.16, "grad_norm": 4.646505832672119, "learning_rate": 3.1376206896551726e-06, "loss": 0.2725, "step": 118075 }, { "epoch": 1.16, "grad_norm": 11.102888107299805, "learning_rate": 3.137189655172414e-06, "loss": 0.1815, "step": 118100 }, { "epoch": 1.16, "grad_norm": 4.320565223693848, "learning_rate": 3.1367586206896555e-06, "loss": 0.2752, "step": 118125 }, { "epoch": 1.16, "grad_norm": 11.126492500305176, "learning_rate": 3.136327586206897e-06, "loss": 0.1304, "step": 118150 }, { "epoch": 1.16, "grad_norm": 4.226693153381348, "learning_rate": 3.135896551724138e-06, "loss": 0.2858, "step": 118175 }, { "epoch": 1.16, "grad_norm": 7.813387870788574, "learning_rate": 3.1354655172413795e-06, "loss": 0.1558, "step": 118200 }, { "epoch": 1.16, "grad_norm": 4.274628639221191, "learning_rate": 3.135034482758621e-06, "loss": 0.2428, "step": 118225 }, { "epoch": 1.16, "grad_norm": 3.1917552947998047, "learning_rate": 3.1346034482758624e-06, "loss": 0.1154, "step": 118250 }, { "epoch": 1.16, "grad_norm": 5.526493072509766, "learning_rate": 3.1341724137931034e-06, "loss": 0.3208, "step": 118275 }, { "epoch": 1.16, "grad_norm": 9.902225494384766, "learning_rate": 3.133741379310345e-06, "loss": 0.1619, "step": 118300 }, { "epoch": 1.16, "grad_norm": 3.5319254398345947, "learning_rate": 3.1333103448275863e-06, "loss": 0.3041, "step": 118325 }, { "epoch": 1.16, "grad_norm": 5.000514984130859, "learning_rate": 3.1328793103448278e-06, "loss": 0.1643, "step": 118350 }, { "epoch": 1.16, "grad_norm": 4.921228885650635, "learning_rate": 3.1324482758620696e-06, "loss": 0.2536, "step": 118375 }, { "epoch": 1.16, "grad_norm": 4.775254249572754, "learning_rate": 3.1320172413793103e-06, "loss": 0.1139, "step": 118400 }, { "epoch": 1.16, "grad_norm": 4.312560558319092, "learning_rate": 3.1315862068965517e-06, "loss": 0.3053, "step": 118425 }, { "epoch": 1.16, "grad_norm": 6.028812408447266, "learning_rate": 3.1311551724137936e-06, "loss": 0.1438, "step": 118450 }, { "epoch": 1.17, "grad_norm": 4.169440746307373, "learning_rate": 3.130724137931035e-06, "loss": 0.2883, "step": 118475 }, { "epoch": 1.17, "grad_norm": 6.799130916595459, "learning_rate": 3.1302931034482757e-06, "loss": 0.1348, "step": 118500 }, { "epoch": 1.17, "grad_norm": 3.585207462310791, "learning_rate": 3.1298620689655175e-06, "loss": 0.3191, "step": 118525 }, { "epoch": 1.17, "grad_norm": 8.088101387023926, "learning_rate": 3.129431034482759e-06, "loss": 0.1533, "step": 118550 }, { "epoch": 1.17, "grad_norm": 5.580173015594482, "learning_rate": 3.1290000000000005e-06, "loss": 0.3707, "step": 118575 }, { "epoch": 1.17, "grad_norm": 9.39868450164795, "learning_rate": 3.128568965517242e-06, "loss": 0.1656, "step": 118600 }, { "epoch": 1.17, "grad_norm": 4.9038519859313965, "learning_rate": 3.128137931034483e-06, "loss": 0.2542, "step": 118625 }, { "epoch": 1.17, "grad_norm": 7.434751987457275, "learning_rate": 3.1277068965517244e-06, "loss": 0.1612, "step": 118650 }, { "epoch": 1.17, "grad_norm": 4.378987789154053, "learning_rate": 3.127275862068966e-06, "loss": 0.2646, "step": 118675 }, { "epoch": 1.17, "grad_norm": 8.979499816894531, "learning_rate": 3.1268448275862073e-06, "loss": 0.1407, "step": 118700 }, { "epoch": 1.17, "grad_norm": 5.1946635246276855, "learning_rate": 3.1264137931034483e-06, "loss": 0.3038, "step": 118725 }, { "epoch": 1.17, "grad_norm": 7.842442512512207, "learning_rate": 3.12598275862069e-06, "loss": 0.1234, "step": 118750 }, { "epoch": 1.17, "grad_norm": 4.602941036224365, "learning_rate": 3.1255517241379313e-06, "loss": 0.2807, "step": 118775 }, { "epoch": 1.17, "grad_norm": 14.323200225830078, "learning_rate": 3.1251206896551727e-06, "loss": 0.1323, "step": 118800 }, { "epoch": 1.17, "grad_norm": 4.918025016784668, "learning_rate": 3.124689655172414e-06, "loss": 0.2795, "step": 118825 }, { "epoch": 1.17, "grad_norm": 4.374828338623047, "learning_rate": 3.124258620689655e-06, "loss": 0.1256, "step": 118850 }, { "epoch": 1.17, "grad_norm": 5.144373416900635, "learning_rate": 3.1238275862068967e-06, "loss": 0.3131, "step": 118875 }, { "epoch": 1.17, "grad_norm": 8.247993469238281, "learning_rate": 3.123396551724138e-06, "loss": 0.1523, "step": 118900 }, { "epoch": 1.17, "grad_norm": 7.994064807891846, "learning_rate": 3.1229655172413796e-06, "loss": 0.2584, "step": 118925 }, { "epoch": 1.17, "grad_norm": 6.295030117034912, "learning_rate": 3.1225344827586206e-06, "loss": 0.1545, "step": 118950 }, { "epoch": 1.17, "grad_norm": 4.295616149902344, "learning_rate": 3.122103448275862e-06, "loss": 0.2528, "step": 118975 }, { "epoch": 1.17, "grad_norm": 7.038844585418701, "learning_rate": 3.1216724137931035e-06, "loss": 0.1427, "step": 119000 }, { "epoch": 1.17, "grad_norm": 5.029229164123535, "learning_rate": 3.1212413793103454e-06, "loss": 0.3018, "step": 119025 }, { "epoch": 1.17, "grad_norm": 6.362788677215576, "learning_rate": 3.120810344827587e-06, "loss": 0.1519, "step": 119050 }, { "epoch": 1.17, "grad_norm": 5.066030502319336, "learning_rate": 3.1203793103448275e-06, "loss": 0.2679, "step": 119075 }, { "epoch": 1.17, "grad_norm": 7.490194797515869, "learning_rate": 3.1199482758620693e-06, "loss": 0.1341, "step": 119100 }, { "epoch": 1.17, "grad_norm": 3.4141416549682617, "learning_rate": 3.119517241379311e-06, "loss": 0.284, "step": 119125 }, { "epoch": 1.17, "grad_norm": 12.270493507385254, "learning_rate": 3.1190862068965523e-06, "loss": 0.1669, "step": 119150 }, { "epoch": 1.17, "grad_norm": 4.7608489990234375, "learning_rate": 3.1186551724137933e-06, "loss": 0.2967, "step": 119175 }, { "epoch": 1.17, "grad_norm": 9.739402770996094, "learning_rate": 3.1182241379310347e-06, "loss": 0.1321, "step": 119200 }, { "epoch": 1.17, "grad_norm": 4.249537467956543, "learning_rate": 3.117793103448276e-06, "loss": 0.2513, "step": 119225 }, { "epoch": 1.17, "grad_norm": 4.431980609893799, "learning_rate": 3.1173620689655177e-06, "loss": 0.1543, "step": 119250 }, { "epoch": 1.17, "grad_norm": 5.589651584625244, "learning_rate": 3.116931034482759e-06, "loss": 0.2721, "step": 119275 }, { "epoch": 1.17, "grad_norm": 8.1904878616333, "learning_rate": 3.1165e-06, "loss": 0.1527, "step": 119300 }, { "epoch": 1.17, "grad_norm": 4.348410606384277, "learning_rate": 3.1160689655172416e-06, "loss": 0.2603, "step": 119325 }, { "epoch": 1.17, "grad_norm": 10.836462020874023, "learning_rate": 3.115637931034483e-06, "loss": 0.1444, "step": 119350 }, { "epoch": 1.17, "grad_norm": 4.918074607849121, "learning_rate": 3.1152068965517245e-06, "loss": 0.3256, "step": 119375 }, { "epoch": 1.17, "grad_norm": 6.705283164978027, "learning_rate": 3.1147758620689655e-06, "loss": 0.1301, "step": 119400 }, { "epoch": 1.17, "grad_norm": 4.945502281188965, "learning_rate": 3.114344827586207e-06, "loss": 0.285, "step": 119425 }, { "epoch": 1.17, "grad_norm": 11.425603866577148, "learning_rate": 3.1139137931034485e-06, "loss": 0.1445, "step": 119450 }, { "epoch": 1.18, "grad_norm": 4.439545154571533, "learning_rate": 3.11348275862069e-06, "loss": 0.2797, "step": 119475 }, { "epoch": 1.18, "grad_norm": 12.63956069946289, "learning_rate": 3.1130517241379314e-06, "loss": 0.149, "step": 119500 }, { "epoch": 1.18, "grad_norm": 4.844807147979736, "learning_rate": 3.1126206896551724e-06, "loss": 0.3117, "step": 119525 }, { "epoch": 1.18, "grad_norm": 9.093160629272461, "learning_rate": 3.112189655172414e-06, "loss": 0.1315, "step": 119550 }, { "epoch": 1.18, "grad_norm": 4.1421895027160645, "learning_rate": 3.1117586206896553e-06, "loss": 0.2979, "step": 119575 }, { "epoch": 1.18, "grad_norm": 7.602053642272949, "learning_rate": 3.111327586206897e-06, "loss": 0.124, "step": 119600 }, { "epoch": 1.18, "grad_norm": 4.62301778793335, "learning_rate": 3.110896551724138e-06, "loss": 0.2749, "step": 119625 }, { "epoch": 1.18, "grad_norm": 10.042867660522461, "learning_rate": 3.1104655172413793e-06, "loss": 0.133, "step": 119650 }, { "epoch": 1.18, "grad_norm": 4.220522403717041, "learning_rate": 3.110034482758621e-06, "loss": 0.3181, "step": 119675 }, { "epoch": 1.18, "grad_norm": 7.178967475891113, "learning_rate": 3.1096034482758626e-06, "loss": 0.1532, "step": 119700 }, { "epoch": 1.18, "grad_norm": 5.285208702087402, "learning_rate": 3.109172413793104e-06, "loss": 0.3019, "step": 119725 }, { "epoch": 1.18, "grad_norm": 8.88916301727295, "learning_rate": 3.108741379310345e-06, "loss": 0.1385, "step": 119750 }, { "epoch": 1.18, "grad_norm": 2.868198871612549, "learning_rate": 3.108327586206897e-06, "loss": 0.2587, "step": 119775 }, { "epoch": 1.18, "grad_norm": 4.661443710327148, "learning_rate": 3.1078965517241384e-06, "loss": 0.1996, "step": 119800 }, { "epoch": 1.18, "grad_norm": 4.42000675201416, "learning_rate": 3.10746551724138e-06, "loss": 0.2951, "step": 119825 }, { "epoch": 1.18, "grad_norm": 4.238685607910156, "learning_rate": 3.107034482758621e-06, "loss": 0.1361, "step": 119850 }, { "epoch": 1.18, "grad_norm": 4.994475364685059, "learning_rate": 3.1066034482758624e-06, "loss": 0.3096, "step": 119875 }, { "epoch": 1.18, "grad_norm": 8.056527137756348, "learning_rate": 3.106172413793104e-06, "loss": 0.1259, "step": 119900 }, { "epoch": 1.18, "grad_norm": 3.4116177558898926, "learning_rate": 3.1057413793103453e-06, "loss": 0.3064, "step": 119925 }, { "epoch": 1.18, "grad_norm": 4.581530570983887, "learning_rate": 3.1053103448275863e-06, "loss": 0.1287, "step": 119950 }, { "epoch": 1.18, "grad_norm": 6.7066240310668945, "learning_rate": 3.1048793103448278e-06, "loss": 0.2495, "step": 119975 }, { "epoch": 1.18, "grad_norm": 8.391517639160156, "learning_rate": 3.1044482758620692e-06, "loss": 0.1565, "step": 120000 }, { "epoch": 1.18, "eval_loss": 0.5059551000595093, "eval_runtime": 5744.4902, "eval_samples_per_second": 1.648, "eval_steps_per_second": 0.206, "eval_wer": 0.1315586171377068, "step": 120000 }, { "epoch": 1.18, "grad_norm": 4.569921016693115, "learning_rate": 3.1040172413793107e-06, "loss": 0.2581, "step": 120025 }, { "epoch": 1.18, "grad_norm": 8.613563537597656, "learning_rate": 3.103586206896552e-06, "loss": 0.1404, "step": 120050 }, { "epoch": 1.18, "grad_norm": 4.06452751159668, "learning_rate": 3.103155172413793e-06, "loss": 0.2531, "step": 120075 }, { "epoch": 1.18, "grad_norm": 14.66318416595459, "learning_rate": 3.1027241379310346e-06, "loss": 0.1548, "step": 120100 }, { "epoch": 1.18, "grad_norm": 3.9715521335601807, "learning_rate": 3.102293103448276e-06, "loss": 0.2729, "step": 120125 }, { "epoch": 1.18, "grad_norm": 9.21590805053711, "learning_rate": 3.1018620689655176e-06, "loss": 0.145, "step": 120150 }, { "epoch": 1.18, "grad_norm": 3.991910696029663, "learning_rate": 3.1014310344827586e-06, "loss": 0.2895, "step": 120175 }, { "epoch": 1.18, "grad_norm": 11.877457618713379, "learning_rate": 3.101e-06, "loss": 0.136, "step": 120200 }, { "epoch": 1.18, "grad_norm": 4.134307384490967, "learning_rate": 3.1005689655172415e-06, "loss": 0.2453, "step": 120225 }, { "epoch": 1.18, "grad_norm": 15.570821762084961, "learning_rate": 3.100137931034483e-06, "loss": 0.1818, "step": 120250 }, { "epoch": 1.18, "grad_norm": 5.253293991088867, "learning_rate": 3.099706896551725e-06, "loss": 0.3136, "step": 120275 }, { "epoch": 1.18, "grad_norm": 11.756230354309082, "learning_rate": 3.0992758620689654e-06, "loss": 0.1423, "step": 120300 }, { "epoch": 1.18, "grad_norm": 5.113284111022949, "learning_rate": 3.098844827586207e-06, "loss": 0.3444, "step": 120325 }, { "epoch": 1.18, "grad_norm": 6.060307502746582, "learning_rate": 3.0984137931034488e-06, "loss": 0.1916, "step": 120350 }, { "epoch": 1.18, "grad_norm": 3.7674343585968018, "learning_rate": 3.0979827586206902e-06, "loss": 0.3067, "step": 120375 }, { "epoch": 1.18, "grad_norm": 11.178573608398438, "learning_rate": 3.097551724137931e-06, "loss": 0.122, "step": 120400 }, { "epoch": 1.18, "grad_norm": 4.694696426391602, "learning_rate": 3.0971206896551727e-06, "loss": 0.2642, "step": 120425 }, { "epoch": 1.18, "grad_norm": 6.609195709228516, "learning_rate": 3.096689655172414e-06, "loss": 0.1629, "step": 120450 }, { "epoch": 1.18, "grad_norm": 4.202245235443115, "learning_rate": 3.0962586206896556e-06, "loss": 0.2772, "step": 120475 }, { "epoch": 1.19, "grad_norm": 8.233917236328125, "learning_rate": 3.095827586206897e-06, "loss": 0.1487, "step": 120500 }, { "epoch": 1.19, "grad_norm": 5.532703399658203, "learning_rate": 3.095396551724138e-06, "loss": 0.3156, "step": 120525 }, { "epoch": 1.19, "grad_norm": 11.425307273864746, "learning_rate": 3.0949655172413796e-06, "loss": 0.171, "step": 120550 }, { "epoch": 1.19, "grad_norm": 4.562745571136475, "learning_rate": 3.094534482758621e-06, "loss": 0.263, "step": 120575 }, { "epoch": 1.19, "grad_norm": 12.369528770446777, "learning_rate": 3.0941034482758625e-06, "loss": 0.1391, "step": 120600 }, { "epoch": 1.19, "grad_norm": 6.971181869506836, "learning_rate": 3.0936724137931035e-06, "loss": 0.2313, "step": 120625 }, { "epoch": 1.19, "grad_norm": 13.086536407470703, "learning_rate": 3.093241379310345e-06, "loss": 0.1281, "step": 120650 }, { "epoch": 1.19, "grad_norm": 4.548788070678711, "learning_rate": 3.0928103448275864e-06, "loss": 0.2819, "step": 120675 }, { "epoch": 1.19, "grad_norm": 6.857945919036865, "learning_rate": 3.092379310344828e-06, "loss": 0.1641, "step": 120700 }, { "epoch": 1.19, "grad_norm": 4.590048313140869, "learning_rate": 3.0919482758620694e-06, "loss": 0.2741, "step": 120725 }, { "epoch": 1.19, "grad_norm": 4.694228172302246, "learning_rate": 3.0915172413793104e-06, "loss": 0.1291, "step": 120750 }, { "epoch": 1.19, "grad_norm": 3.9550352096557617, "learning_rate": 3.091086206896552e-06, "loss": 0.3088, "step": 120775 }, { "epoch": 1.19, "grad_norm": 10.369668960571289, "learning_rate": 3.0906551724137933e-06, "loss": 0.1415, "step": 120800 }, { "epoch": 1.19, "grad_norm": 6.4453630447387695, "learning_rate": 3.0902241379310348e-06, "loss": 0.2873, "step": 120825 }, { "epoch": 1.19, "grad_norm": 8.15467357635498, "learning_rate": 3.089793103448276e-06, "loss": 0.1223, "step": 120850 }, { "epoch": 1.19, "grad_norm": 4.6509785652160645, "learning_rate": 3.0893620689655172e-06, "loss": 0.2963, "step": 120875 }, { "epoch": 1.19, "grad_norm": 6.248692035675049, "learning_rate": 3.0889310344827587e-06, "loss": 0.1563, "step": 120900 }, { "epoch": 1.19, "grad_norm": 4.325538158416748, "learning_rate": 3.0885000000000006e-06, "loss": 0.3371, "step": 120925 }, { "epoch": 1.19, "grad_norm": 8.016426086425781, "learning_rate": 3.088068965517242e-06, "loss": 0.151, "step": 120950 }, { "epoch": 1.19, "grad_norm": 5.104395389556885, "learning_rate": 3.0876379310344826e-06, "loss": 0.3081, "step": 120975 }, { "epoch": 1.19, "grad_norm": 4.489412784576416, "learning_rate": 3.0872068965517245e-06, "loss": 0.1354, "step": 121000 }, { "epoch": 1.19, "grad_norm": 4.021217346191406, "learning_rate": 3.086775862068966e-06, "loss": 0.2549, "step": 121025 }, { "epoch": 1.19, "grad_norm": 9.82274055480957, "learning_rate": 3.0863448275862074e-06, "loss": 0.181, "step": 121050 }, { "epoch": 1.19, "grad_norm": 5.694716930389404, "learning_rate": 3.0859137931034485e-06, "loss": 0.3221, "step": 121075 }, { "epoch": 1.19, "grad_norm": 10.541130065917969, "learning_rate": 3.08548275862069e-06, "loss": 0.1221, "step": 121100 }, { "epoch": 1.19, "grad_norm": 5.427818298339844, "learning_rate": 3.0850517241379314e-06, "loss": 0.3355, "step": 121125 }, { "epoch": 1.19, "grad_norm": 18.127674102783203, "learning_rate": 3.084620689655173e-06, "loss": 0.15, "step": 121150 }, { "epoch": 1.19, "grad_norm": 5.20603609085083, "learning_rate": 3.0841896551724143e-06, "loss": 0.3032, "step": 121175 }, { "epoch": 1.19, "grad_norm": 12.605658531188965, "learning_rate": 3.0837586206896553e-06, "loss": 0.1385, "step": 121200 }, { "epoch": 1.19, "grad_norm": 5.475026607513428, "learning_rate": 3.083327586206897e-06, "loss": 0.3303, "step": 121225 }, { "epoch": 1.19, "grad_norm": 7.6491618156433105, "learning_rate": 3.0828965517241382e-06, "loss": 0.1499, "step": 121250 }, { "epoch": 1.19, "grad_norm": 7.1178107261657715, "learning_rate": 3.0824655172413797e-06, "loss": 0.29, "step": 121275 }, { "epoch": 1.19, "grad_norm": 5.592559814453125, "learning_rate": 3.0820344827586207e-06, "loss": 0.1498, "step": 121300 }, { "epoch": 1.19, "grad_norm": 4.342351913452148, "learning_rate": 3.081603448275862e-06, "loss": 0.3017, "step": 121325 }, { "epoch": 1.19, "grad_norm": 4.838769912719727, "learning_rate": 3.0811724137931036e-06, "loss": 0.1388, "step": 121350 }, { "epoch": 1.19, "grad_norm": 6.368029594421387, "learning_rate": 3.080741379310345e-06, "loss": 0.2343, "step": 121375 }, { "epoch": 1.19, "grad_norm": 2.680541753768921, "learning_rate": 3.0803103448275866e-06, "loss": 0.1181, "step": 121400 }, { "epoch": 1.19, "grad_norm": 3.644279718399048, "learning_rate": 3.0798793103448276e-06, "loss": 0.2649, "step": 121425 }, { "epoch": 1.19, "grad_norm": 1.1683913469314575, "learning_rate": 3.079448275862069e-06, "loss": 0.149, "step": 121450 }, { "epoch": 1.19, "grad_norm": 4.3739752769470215, "learning_rate": 3.0790172413793105e-06, "loss": 0.2869, "step": 121475 }, { "epoch": 1.19, "grad_norm": 1.3023749589920044, "learning_rate": 3.0785862068965524e-06, "loss": 0.1187, "step": 121500 }, { "epoch": 1.2, "grad_norm": 4.8032965660095215, "learning_rate": 3.078155172413793e-06, "loss": 0.2744, "step": 121525 }, { "epoch": 1.2, "grad_norm": 10.287053108215332, "learning_rate": 3.0777241379310345e-06, "loss": 0.1734, "step": 121550 }, { "epoch": 1.2, "grad_norm": 3.741650342941284, "learning_rate": 3.0772931034482763e-06, "loss": 0.2771, "step": 121575 }, { "epoch": 1.2, "grad_norm": 6.559027194976807, "learning_rate": 3.076862068965518e-06, "loss": 0.1148, "step": 121600 }, { "epoch": 1.2, "grad_norm": 4.119940280914307, "learning_rate": 3.0764310344827592e-06, "loss": 0.3233, "step": 121625 }, { "epoch": 1.2, "grad_norm": 8.0961275100708, "learning_rate": 3.0760000000000003e-06, "loss": 0.1529, "step": 121650 }, { "epoch": 1.2, "grad_norm": 5.235069274902344, "learning_rate": 3.0755689655172417e-06, "loss": 0.3132, "step": 121675 }, { "epoch": 1.2, "grad_norm": 9.68993091583252, "learning_rate": 3.075137931034483e-06, "loss": 0.1578, "step": 121700 }, { "epoch": 1.2, "grad_norm": 4.360131740570068, "learning_rate": 3.0747068965517246e-06, "loss": 0.317, "step": 121725 }, { "epoch": 1.2, "grad_norm": 7.534379482269287, "learning_rate": 3.0742758620689657e-06, "loss": 0.14, "step": 121750 }, { "epoch": 1.2, "grad_norm": 7.114737510681152, "learning_rate": 3.073844827586207e-06, "loss": 0.2543, "step": 121775 }, { "epoch": 1.2, "grad_norm": 10.486727714538574, "learning_rate": 3.0734137931034486e-06, "loss": 0.1366, "step": 121800 }, { "epoch": 1.2, "grad_norm": 4.99083137512207, "learning_rate": 3.07298275862069e-06, "loss": 0.3285, "step": 121825 }, { "epoch": 1.2, "grad_norm": 5.622518062591553, "learning_rate": 3.072551724137931e-06, "loss": 0.1628, "step": 121850 }, { "epoch": 1.2, "grad_norm": 5.161464214324951, "learning_rate": 3.072137931034483e-06, "loss": 0.274, "step": 121875 }, { "epoch": 1.2, "grad_norm": 6.141162872314453, "learning_rate": 3.0717068965517244e-06, "loss": 0.1622, "step": 121900 }, { "epoch": 1.2, "grad_norm": 3.869520425796509, "learning_rate": 3.071275862068966e-06, "loss": 0.2438, "step": 121925 }, { "epoch": 1.2, "grad_norm": 11.604798316955566, "learning_rate": 3.0708448275862073e-06, "loss": 0.1298, "step": 121950 }, { "epoch": 1.2, "grad_norm": 4.652565956115723, "learning_rate": 3.0704137931034484e-06, "loss": 0.2822, "step": 121975 }, { "epoch": 1.2, "grad_norm": 10.899847030639648, "learning_rate": 3.06998275862069e-06, "loss": 0.1704, "step": 122000 }, { "epoch": 1.2, "grad_norm": 5.213729381561279, "learning_rate": 3.0695517241379313e-06, "loss": 0.2685, "step": 122025 }, { "epoch": 1.2, "grad_norm": 8.090422630310059, "learning_rate": 3.0691206896551727e-06, "loss": 0.1374, "step": 122050 }, { "epoch": 1.2, "grad_norm": 6.4349894523620605, "learning_rate": 3.0686896551724138e-06, "loss": 0.3337, "step": 122075 }, { "epoch": 1.2, "grad_norm": 10.420483589172363, "learning_rate": 3.0682586206896552e-06, "loss": 0.1644, "step": 122100 }, { "epoch": 1.2, "grad_norm": 3.5164427757263184, "learning_rate": 3.0678275862068967e-06, "loss": 0.257, "step": 122125 }, { "epoch": 1.2, "grad_norm": 7.004861354827881, "learning_rate": 3.067396551724138e-06, "loss": 0.1441, "step": 122150 }, { "epoch": 1.2, "grad_norm": 5.553203105926514, "learning_rate": 3.0669655172413796e-06, "loss": 0.287, "step": 122175 }, { "epoch": 1.2, "grad_norm": 5.816309928894043, "learning_rate": 3.0665344827586206e-06, "loss": 0.1479, "step": 122200 }, { "epoch": 1.2, "grad_norm": 4.0763773918151855, "learning_rate": 3.066103448275862e-06, "loss": 0.3431, "step": 122225 }, { "epoch": 1.2, "grad_norm": 11.828118324279785, "learning_rate": 3.065672413793104e-06, "loss": 0.1551, "step": 122250 }, { "epoch": 1.2, "grad_norm": 5.411355018615723, "learning_rate": 3.0652413793103454e-06, "loss": 0.3034, "step": 122275 }, { "epoch": 1.2, "grad_norm": 12.211695671081543, "learning_rate": 3.064810344827586e-06, "loss": 0.1202, "step": 122300 }, { "epoch": 1.2, "grad_norm": 4.597732067108154, "learning_rate": 3.064379310344828e-06, "loss": 0.3591, "step": 122325 }, { "epoch": 1.2, "grad_norm": 6.279448986053467, "learning_rate": 3.0639482758620694e-06, "loss": 0.1332, "step": 122350 }, { "epoch": 1.2, "grad_norm": 4.4544291496276855, "learning_rate": 3.063517241379311e-06, "loss": 0.2848, "step": 122375 }, { "epoch": 1.2, "grad_norm": 4.014398574829102, "learning_rate": 3.0630862068965523e-06, "loss": 0.1466, "step": 122400 }, { "epoch": 1.2, "grad_norm": 5.039041042327881, "learning_rate": 3.0626551724137933e-06, "loss": 0.3203, "step": 122425 }, { "epoch": 1.2, "grad_norm": 12.955848693847656, "learning_rate": 3.0622241379310348e-06, "loss": 0.1449, "step": 122450 }, { "epoch": 1.2, "grad_norm": 3.6886911392211914, "learning_rate": 3.0617931034482762e-06, "loss": 0.3053, "step": 122475 }, { "epoch": 1.2, "grad_norm": 5.991684436798096, "learning_rate": 3.0613620689655177e-06, "loss": 0.1435, "step": 122500 }, { "epoch": 1.21, "grad_norm": 3.347503900527954, "learning_rate": 3.0609310344827587e-06, "loss": 0.2509, "step": 122525 }, { "epoch": 1.21, "grad_norm": 5.064601898193359, "learning_rate": 3.0605e-06, "loss": 0.166, "step": 122550 }, { "epoch": 1.21, "grad_norm": 5.288036823272705, "learning_rate": 3.0600689655172416e-06, "loss": 0.2768, "step": 122575 }, { "epoch": 1.21, "grad_norm": 8.742223739624023, "learning_rate": 3.059637931034483e-06, "loss": 0.1411, "step": 122600 }, { "epoch": 1.21, "grad_norm": 4.346393585205078, "learning_rate": 3.0592068965517245e-06, "loss": 0.2895, "step": 122625 }, { "epoch": 1.21, "grad_norm": 7.800521373748779, "learning_rate": 3.0587758620689656e-06, "loss": 0.1203, "step": 122650 }, { "epoch": 1.21, "grad_norm": 5.620489597320557, "learning_rate": 3.058344827586207e-06, "loss": 0.2603, "step": 122675 }, { "epoch": 1.21, "grad_norm": 5.828588962554932, "learning_rate": 3.0579137931034485e-06, "loss": 0.1282, "step": 122700 }, { "epoch": 1.21, "grad_norm": 3.0307536125183105, "learning_rate": 3.05748275862069e-06, "loss": 0.2475, "step": 122725 }, { "epoch": 1.21, "grad_norm": 8.763359069824219, "learning_rate": 3.057051724137931e-06, "loss": 0.1548, "step": 122750 }, { "epoch": 1.21, "grad_norm": 4.693431854248047, "learning_rate": 3.0566206896551724e-06, "loss": 0.2722, "step": 122775 }, { "epoch": 1.21, "grad_norm": 8.99606704711914, "learning_rate": 3.056189655172414e-06, "loss": 0.1372, "step": 122800 }, { "epoch": 1.21, "grad_norm": 3.56937837600708, "learning_rate": 3.0557586206896558e-06, "loss": 0.3126, "step": 122825 }, { "epoch": 1.21, "grad_norm": 5.7159247398376465, "learning_rate": 3.0553275862068972e-06, "loss": 0.1287, "step": 122850 }, { "epoch": 1.21, "grad_norm": 6.197281837463379, "learning_rate": 3.054896551724138e-06, "loss": 0.2665, "step": 122875 }, { "epoch": 1.21, "grad_norm": 9.763036727905273, "learning_rate": 3.0544655172413797e-06, "loss": 0.1496, "step": 122900 }, { "epoch": 1.21, "grad_norm": 4.432664394378662, "learning_rate": 3.054034482758621e-06, "loss": 0.2655, "step": 122925 }, { "epoch": 1.21, "grad_norm": 7.8362717628479, "learning_rate": 3.0536034482758626e-06, "loss": 0.1408, "step": 122950 }, { "epoch": 1.21, "grad_norm": 5.73299503326416, "learning_rate": 3.0531724137931037e-06, "loss": 0.2681, "step": 122975 }, { "epoch": 1.21, "grad_norm": 15.929830551147461, "learning_rate": 3.052741379310345e-06, "loss": 0.1348, "step": 123000 }, { "epoch": 1.21, "grad_norm": 5.149907112121582, "learning_rate": 3.0523103448275866e-06, "loss": 0.2945, "step": 123025 }, { "epoch": 1.21, "grad_norm": 8.993551254272461, "learning_rate": 3.051879310344828e-06, "loss": 0.1877, "step": 123050 }, { "epoch": 1.21, "grad_norm": 4.685800075531006, "learning_rate": 3.0514482758620695e-06, "loss": 0.2975, "step": 123075 }, { "epoch": 1.21, "grad_norm": 4.903399467468262, "learning_rate": 3.0510172413793105e-06, "loss": 0.1619, "step": 123100 }, { "epoch": 1.21, "grad_norm": 3.497797727584839, "learning_rate": 3.050586206896552e-06, "loss": 0.2722, "step": 123125 }, { "epoch": 1.21, "grad_norm": 5.650899410247803, "learning_rate": 3.0501551724137934e-06, "loss": 0.1628, "step": 123150 }, { "epoch": 1.21, "grad_norm": 3.4803314208984375, "learning_rate": 3.049724137931035e-06, "loss": 0.2788, "step": 123175 }, { "epoch": 1.21, "grad_norm": 11.340283393859863, "learning_rate": 3.049293103448276e-06, "loss": 0.1429, "step": 123200 }, { "epoch": 1.21, "grad_norm": 4.803812026977539, "learning_rate": 3.0488620689655174e-06, "loss": 0.2296, "step": 123225 }, { "epoch": 1.21, "grad_norm": 9.535140991210938, "learning_rate": 3.048431034482759e-06, "loss": 0.137, "step": 123250 }, { "epoch": 1.21, "grad_norm": 3.6139750480651855, "learning_rate": 3.0480000000000003e-06, "loss": 0.2658, "step": 123275 }, { "epoch": 1.21, "grad_norm": 7.151445388793945, "learning_rate": 3.0475689655172413e-06, "loss": 0.1439, "step": 123300 }, { "epoch": 1.21, "grad_norm": 4.356892108917236, "learning_rate": 3.0471379310344828e-06, "loss": 0.304, "step": 123325 }, { "epoch": 1.21, "grad_norm": 9.701735496520996, "learning_rate": 3.0467068965517242e-06, "loss": 0.1508, "step": 123350 }, { "epoch": 1.21, "grad_norm": 3.9916703701019287, "learning_rate": 3.0462758620689657e-06, "loss": 0.2745, "step": 123375 }, { "epoch": 1.21, "grad_norm": 5.6181230545043945, "learning_rate": 3.045844827586207e-06, "loss": 0.1234, "step": 123400 }, { "epoch": 1.21, "grad_norm": 4.836647033691406, "learning_rate": 3.045413793103448e-06, "loss": 0.3293, "step": 123425 }, { "epoch": 1.21, "grad_norm": 10.500590324401855, "learning_rate": 3.0449827586206896e-06, "loss": 0.1317, "step": 123450 }, { "epoch": 1.21, "grad_norm": 6.478087902069092, "learning_rate": 3.0445517241379315e-06, "loss": 0.3311, "step": 123475 }, { "epoch": 1.21, "grad_norm": 13.461045265197754, "learning_rate": 3.044120689655173e-06, "loss": 0.1417, "step": 123500 }, { "epoch": 1.21, "grad_norm": 3.7614285945892334, "learning_rate": 3.0436896551724136e-06, "loss": 0.3354, "step": 123525 }, { "epoch": 1.22, "grad_norm": 10.421733856201172, "learning_rate": 3.0432586206896555e-06, "loss": 0.1628, "step": 123550 }, { "epoch": 1.22, "grad_norm": 3.880204439163208, "learning_rate": 3.042827586206897e-06, "loss": 0.3118, "step": 123575 }, { "epoch": 1.22, "grad_norm": 7.867882251739502, "learning_rate": 3.0423965517241384e-06, "loss": 0.1305, "step": 123600 }, { "epoch": 1.22, "grad_norm": 4.915421009063721, "learning_rate": 3.04196551724138e-06, "loss": 0.239, "step": 123625 }, { "epoch": 1.22, "grad_norm": 5.759435176849365, "learning_rate": 3.041534482758621e-06, "loss": 0.12, "step": 123650 }, { "epoch": 1.22, "grad_norm": 5.648083209991455, "learning_rate": 3.0411034482758623e-06, "loss": 0.3004, "step": 123675 }, { "epoch": 1.22, "grad_norm": 9.98023509979248, "learning_rate": 3.0406724137931038e-06, "loss": 0.1654, "step": 123700 }, { "epoch": 1.22, "grad_norm": 8.636151313781738, "learning_rate": 3.0402413793103452e-06, "loss": 0.3178, "step": 123725 }, { "epoch": 1.22, "grad_norm": 7.917018890380859, "learning_rate": 3.0398103448275863e-06, "loss": 0.129, "step": 123750 }, { "epoch": 1.22, "grad_norm": 4.305187702178955, "learning_rate": 3.0393793103448277e-06, "loss": 0.2983, "step": 123775 }, { "epoch": 1.22, "grad_norm": 6.878952503204346, "learning_rate": 3.038948275862069e-06, "loss": 0.1064, "step": 123800 }, { "epoch": 1.22, "grad_norm": 5.513192176818848, "learning_rate": 3.0385172413793106e-06, "loss": 0.3367, "step": 123825 }, { "epoch": 1.22, "grad_norm": 8.626699447631836, "learning_rate": 3.038086206896552e-06, "loss": 0.1579, "step": 123850 }, { "epoch": 1.22, "grad_norm": 5.334062576293945, "learning_rate": 3.037655172413793e-06, "loss": 0.3075, "step": 123875 }, { "epoch": 1.22, "grad_norm": 12.050763130187988, "learning_rate": 3.0372241379310346e-06, "loss": 0.1643, "step": 123900 }, { "epoch": 1.22, "grad_norm": 3.3931686878204346, "learning_rate": 3.036793103448276e-06, "loss": 0.2623, "step": 123925 }, { "epoch": 1.22, "grad_norm": 4.034700393676758, "learning_rate": 3.0363620689655175e-06, "loss": 0.1461, "step": 123950 }, { "epoch": 1.22, "grad_norm": 6.373013019561768, "learning_rate": 3.0359310344827585e-06, "loss": 0.2437, "step": 123975 }, { "epoch": 1.22, "grad_norm": 12.259376525878906, "learning_rate": 3.0355e-06, "loss": 0.1575, "step": 124000 }, { "epoch": 1.22, "grad_norm": 4.803765773773193, "learning_rate": 3.0350689655172414e-06, "loss": 0.3128, "step": 124025 }, { "epoch": 1.22, "grad_norm": 11.045470237731934, "learning_rate": 3.0346379310344833e-06, "loss": 0.1637, "step": 124050 }, { "epoch": 1.22, "grad_norm": 5.253839492797852, "learning_rate": 3.0342241379310348e-06, "loss": 0.3058, "step": 124075 }, { "epoch": 1.22, "grad_norm": 2.867600679397583, "learning_rate": 3.033793103448276e-06, "loss": 0.1198, "step": 124100 }, { "epoch": 1.22, "grad_norm": 4.126093864440918, "learning_rate": 3.0333620689655173e-06, "loss": 0.2962, "step": 124125 }, { "epoch": 1.22, "grad_norm": 5.661530494689941, "learning_rate": 3.0329310344827587e-06, "loss": 0.1713, "step": 124150 }, { "epoch": 1.22, "grad_norm": 3.9378740787506104, "learning_rate": 3.0325000000000006e-06, "loss": 0.2553, "step": 124175 }, { "epoch": 1.22, "grad_norm": 7.84490966796875, "learning_rate": 3.0320689655172412e-06, "loss": 0.1373, "step": 124200 }, { "epoch": 1.22, "grad_norm": 5.475949764251709, "learning_rate": 3.031637931034483e-06, "loss": 0.2465, "step": 124225 }, { "epoch": 1.22, "grad_norm": 8.371650695800781, "learning_rate": 3.0312068965517246e-06, "loss": 0.1307, "step": 124250 }, { "epoch": 1.22, "grad_norm": 4.363217353820801, "learning_rate": 3.030775862068966e-06, "loss": 0.3014, "step": 124275 }, { "epoch": 1.22, "grad_norm": 11.904874801635742, "learning_rate": 3.0303448275862075e-06, "loss": 0.1761, "step": 124300 }, { "epoch": 1.22, "grad_norm": 5.55465030670166, "learning_rate": 3.0299137931034485e-06, "loss": 0.2856, "step": 124325 }, { "epoch": 1.22, "grad_norm": 9.062211036682129, "learning_rate": 3.02948275862069e-06, "loss": 0.1621, "step": 124350 }, { "epoch": 1.22, "grad_norm": 5.562146186828613, "learning_rate": 3.0290517241379314e-06, "loss": 0.309, "step": 124375 }, { "epoch": 1.22, "grad_norm": 8.747289657592773, "learning_rate": 3.028620689655173e-06, "loss": 0.1329, "step": 124400 }, { "epoch": 1.22, "grad_norm": 5.271026611328125, "learning_rate": 3.028189655172414e-06, "loss": 0.2839, "step": 124425 }, { "epoch": 1.22, "grad_norm": 9.766793251037598, "learning_rate": 3.0277586206896554e-06, "loss": 0.1166, "step": 124450 }, { "epoch": 1.22, "grad_norm": 6.898532867431641, "learning_rate": 3.027327586206897e-06, "loss": 0.2842, "step": 124475 }, { "epoch": 1.22, "grad_norm": 11.942578315734863, "learning_rate": 3.0268965517241383e-06, "loss": 0.1349, "step": 124500 }, { "epoch": 1.22, "grad_norm": 3.9086344242095947, "learning_rate": 3.0264655172413797e-06, "loss": 0.3117, "step": 124525 }, { "epoch": 1.22, "grad_norm": 14.045716285705566, "learning_rate": 3.0260344827586208e-06, "loss": 0.1298, "step": 124550 }, { "epoch": 1.23, "grad_norm": 4.554497718811035, "learning_rate": 3.0256034482758622e-06, "loss": 0.3113, "step": 124575 }, { "epoch": 1.23, "grad_norm": 6.686511516571045, "learning_rate": 3.0251724137931037e-06, "loss": 0.1561, "step": 124600 }, { "epoch": 1.23, "grad_norm": 4.241798400878906, "learning_rate": 3.024741379310345e-06, "loss": 0.3091, "step": 124625 }, { "epoch": 1.23, "grad_norm": 11.398628234863281, "learning_rate": 3.024310344827586e-06, "loss": 0.1202, "step": 124650 }, { "epoch": 1.23, "grad_norm": 5.223613739013672, "learning_rate": 3.0238793103448276e-06, "loss": 0.2726, "step": 124675 }, { "epoch": 1.23, "grad_norm": 6.08660364151001, "learning_rate": 3.023448275862069e-06, "loss": 0.148, "step": 124700 }, { "epoch": 1.23, "grad_norm": 4.0325164794921875, "learning_rate": 3.0230172413793105e-06, "loss": 0.2809, "step": 124725 }, { "epoch": 1.23, "grad_norm": 7.539151668548584, "learning_rate": 3.0225862068965524e-06, "loss": 0.1772, "step": 124750 }, { "epoch": 1.23, "grad_norm": 4.90236759185791, "learning_rate": 3.022155172413793e-06, "loss": 0.293, "step": 124775 }, { "epoch": 1.23, "grad_norm": 5.997454643249512, "learning_rate": 3.021724137931035e-06, "loss": 0.1394, "step": 124800 }, { "epoch": 1.23, "grad_norm": 3.866593360900879, "learning_rate": 3.0212931034482764e-06, "loss": 0.3111, "step": 124825 }, { "epoch": 1.23, "grad_norm": 6.540928840637207, "learning_rate": 3.020862068965518e-06, "loss": 0.1305, "step": 124850 }, { "epoch": 1.23, "grad_norm": 5.320431232452393, "learning_rate": 3.020431034482759e-06, "loss": 0.2653, "step": 124875 }, { "epoch": 1.23, "grad_norm": 6.648142337799072, "learning_rate": 3.0200000000000003e-06, "loss": 0.1289, "step": 124900 }, { "epoch": 1.23, "grad_norm": 5.335725784301758, "learning_rate": 3.0195689655172418e-06, "loss": 0.2297, "step": 124925 }, { "epoch": 1.23, "grad_norm": 13.365460395812988, "learning_rate": 3.0191379310344832e-06, "loss": 0.1388, "step": 124950 }, { "epoch": 1.23, "grad_norm": 4.333001136779785, "learning_rate": 3.0187068965517243e-06, "loss": 0.2709, "step": 124975 }, { "epoch": 1.23, "grad_norm": 6.484216690063477, "learning_rate": 3.0182758620689657e-06, "loss": 0.1543, "step": 125000 }, { "epoch": 1.23, "grad_norm": 3.774278402328491, "learning_rate": 3.017844827586207e-06, "loss": 0.2846, "step": 125025 }, { "epoch": 1.23, "grad_norm": 6.089615345001221, "learning_rate": 3.0174137931034486e-06, "loss": 0.1255, "step": 125050 }, { "epoch": 1.23, "grad_norm": 9.83164119720459, "learning_rate": 3.01698275862069e-06, "loss": 0.2608, "step": 125075 }, { "epoch": 1.23, "grad_norm": 5.745550155639648, "learning_rate": 3.016551724137931e-06, "loss": 0.1218, "step": 125100 }, { "epoch": 1.23, "grad_norm": 5.343557834625244, "learning_rate": 3.0161206896551726e-06, "loss": 0.2967, "step": 125125 }, { "epoch": 1.23, "grad_norm": 13.127808570861816, "learning_rate": 3.015689655172414e-06, "loss": 0.1559, "step": 125150 }, { "epoch": 1.23, "grad_norm": 4.344359874725342, "learning_rate": 3.0152586206896555e-06, "loss": 0.3036, "step": 125175 }, { "epoch": 1.23, "grad_norm": 5.426344394683838, "learning_rate": 3.0148275862068965e-06, "loss": 0.1616, "step": 125200 }, { "epoch": 1.23, "grad_norm": 4.222601413726807, "learning_rate": 3.014396551724138e-06, "loss": 0.2666, "step": 125225 }, { "epoch": 1.23, "grad_norm": 8.900943756103516, "learning_rate": 3.0139655172413794e-06, "loss": 0.1619, "step": 125250 }, { "epoch": 1.23, "grad_norm": 8.23507022857666, "learning_rate": 3.013534482758621e-06, "loss": 0.2792, "step": 125275 }, { "epoch": 1.23, "grad_norm": 2.464251756668091, "learning_rate": 3.0131034482758623e-06, "loss": 0.1568, "step": 125300 }, { "epoch": 1.23, "grad_norm": 4.142052173614502, "learning_rate": 3.0126724137931034e-06, "loss": 0.2797, "step": 125325 }, { "epoch": 1.23, "grad_norm": 3.607693672180176, "learning_rate": 3.012241379310345e-06, "loss": 0.1477, "step": 125350 }, { "epoch": 1.23, "grad_norm": 4.110520839691162, "learning_rate": 3.0118103448275863e-06, "loss": 0.251, "step": 125375 }, { "epoch": 1.23, "grad_norm": 3.7511377334594727, "learning_rate": 3.011379310344828e-06, "loss": 0.1657, "step": 125400 }, { "epoch": 1.23, "grad_norm": 4.060960292816162, "learning_rate": 3.0109482758620688e-06, "loss": 0.2818, "step": 125425 }, { "epoch": 1.23, "grad_norm": 12.276758193969727, "learning_rate": 3.0105172413793107e-06, "loss": 0.1485, "step": 125450 }, { "epoch": 1.23, "grad_norm": 4.7591094970703125, "learning_rate": 3.010086206896552e-06, "loss": 0.25, "step": 125475 }, { "epoch": 1.23, "grad_norm": 5.444262504577637, "learning_rate": 3.0096551724137936e-06, "loss": 0.1556, "step": 125500 }, { "epoch": 1.23, "grad_norm": 3.986522674560547, "learning_rate": 3.009224137931035e-06, "loss": 0.2252, "step": 125525 }, { "epoch": 1.23, "grad_norm": 8.205169677734375, "learning_rate": 3.008793103448276e-06, "loss": 0.1565, "step": 125550 }, { "epoch": 1.24, "grad_norm": 11.886555671691895, "learning_rate": 3.0083620689655175e-06, "loss": 0.2696, "step": 125575 }, { "epoch": 1.24, "grad_norm": 8.645598411560059, "learning_rate": 3.007931034482759e-06, "loss": 0.1545, "step": 125600 }, { "epoch": 1.24, "grad_norm": 5.485776901245117, "learning_rate": 3.0075000000000004e-06, "loss": 0.2822, "step": 125625 }, { "epoch": 1.24, "grad_norm": 27.60085105895996, "learning_rate": 3.0070689655172415e-06, "loss": 0.1498, "step": 125650 }, { "epoch": 1.24, "grad_norm": 5.241468906402588, "learning_rate": 3.006637931034483e-06, "loss": 0.3109, "step": 125675 }, { "epoch": 1.24, "grad_norm": 12.018869400024414, "learning_rate": 3.0062068965517244e-06, "loss": 0.123, "step": 125700 }, { "epoch": 1.24, "grad_norm": 5.986661911010742, "learning_rate": 3.005775862068966e-06, "loss": 0.3334, "step": 125725 }, { "epoch": 1.24, "grad_norm": 13.173177719116211, "learning_rate": 3.0053448275862073e-06, "loss": 0.1586, "step": 125750 }, { "epoch": 1.24, "grad_norm": 5.551051139831543, "learning_rate": 3.0049137931034483e-06, "loss": 0.2742, "step": 125775 }, { "epoch": 1.24, "grad_norm": 8.387846946716309, "learning_rate": 3.0044827586206898e-06, "loss": 0.1126, "step": 125800 }, { "epoch": 1.24, "grad_norm": 5.278562545776367, "learning_rate": 3.0040517241379312e-06, "loss": 0.295, "step": 125825 }, { "epoch": 1.24, "grad_norm": 9.359222412109375, "learning_rate": 3.0036206896551727e-06, "loss": 0.1559, "step": 125850 }, { "epoch": 1.24, "grad_norm": 4.026912689208984, "learning_rate": 3.0031896551724137e-06, "loss": 0.2996, "step": 125875 }, { "epoch": 1.24, "grad_norm": 9.070088386535645, "learning_rate": 3.002758620689655e-06, "loss": 0.2013, "step": 125900 }, { "epoch": 1.24, "grad_norm": 3.624222993850708, "learning_rate": 3.0023275862068966e-06, "loss": 0.3007, "step": 125925 }, { "epoch": 1.24, "grad_norm": 4.708890914916992, "learning_rate": 3.001896551724138e-06, "loss": 0.1625, "step": 125950 }, { "epoch": 1.24, "grad_norm": 4.708754539489746, "learning_rate": 3.00146551724138e-06, "loss": 0.2929, "step": 125975 }, { "epoch": 1.24, "grad_norm": 9.940010070800781, "learning_rate": 3.0010344827586206e-06, "loss": 0.1405, "step": 126000 }, { "epoch": 1.24, "grad_norm": 4.958669662475586, "learning_rate": 3.0006034482758625e-06, "loss": 0.3131, "step": 126025 }, { "epoch": 1.24, "grad_norm": 11.055444717407227, "learning_rate": 3.000172413793104e-06, "loss": 0.1702, "step": 126050 }, { "epoch": 1.24, "grad_norm": 3.8382458686828613, "learning_rate": 2.9997413793103454e-06, "loss": 0.3022, "step": 126075 }, { "epoch": 1.24, "grad_norm": 10.36175537109375, "learning_rate": 2.9993103448275864e-06, "loss": 0.1173, "step": 126100 }, { "epoch": 1.24, "grad_norm": 4.744218826293945, "learning_rate": 2.998879310344828e-06, "loss": 0.2625, "step": 126125 }, { "epoch": 1.24, "grad_norm": 5.764126300811768, "learning_rate": 2.9984482758620693e-06, "loss": 0.1573, "step": 126150 }, { "epoch": 1.24, "grad_norm": 4.158679008483887, "learning_rate": 2.9980172413793108e-06, "loss": 0.2697, "step": 126175 }, { "epoch": 1.24, "grad_norm": 16.601808547973633, "learning_rate": 2.9975862068965522e-06, "loss": 0.1848, "step": 126200 }, { "epoch": 1.24, "grad_norm": 3.9806244373321533, "learning_rate": 2.9971551724137933e-06, "loss": 0.2723, "step": 126225 }, { "epoch": 1.24, "grad_norm": 8.717184066772461, "learning_rate": 2.9967241379310347e-06, "loss": 0.1358, "step": 126250 }, { "epoch": 1.24, "grad_norm": 3.733569860458374, "learning_rate": 2.996293103448276e-06, "loss": 0.3054, "step": 126275 }, { "epoch": 1.24, "grad_norm": 10.796332359313965, "learning_rate": 2.9958620689655176e-06, "loss": 0.1795, "step": 126300 }, { "epoch": 1.24, "grad_norm": 5.957956790924072, "learning_rate": 2.9954310344827587e-06, "loss": 0.2654, "step": 126325 }, { "epoch": 1.24, "grad_norm": 5.42051362991333, "learning_rate": 2.995e-06, "loss": 0.1448, "step": 126350 }, { "epoch": 1.24, "grad_norm": 11.119826316833496, "learning_rate": 2.994586206896552e-06, "loss": 0.2522, "step": 126375 }, { "epoch": 1.24, "grad_norm": 4.142794609069824, "learning_rate": 2.9941551724137935e-06, "loss": 0.1641, "step": 126400 }, { "epoch": 1.24, "grad_norm": 4.370267868041992, "learning_rate": 2.993724137931035e-06, "loss": 0.2791, "step": 126425 }, { "epoch": 1.24, "grad_norm": 7.622269630432129, "learning_rate": 2.993293103448276e-06, "loss": 0.1226, "step": 126450 }, { "epoch": 1.24, "grad_norm": 5.095435619354248, "learning_rate": 2.9928620689655174e-06, "loss": 0.3139, "step": 126475 }, { "epoch": 1.24, "grad_norm": 9.483612060546875, "learning_rate": 2.992431034482759e-06, "loss": 0.1295, "step": 126500 }, { "epoch": 1.24, "grad_norm": 6.900043487548828, "learning_rate": 2.9920000000000003e-06, "loss": 0.2707, "step": 126525 }, { "epoch": 1.24, "grad_norm": 11.187432289123535, "learning_rate": 2.9915689655172414e-06, "loss": 0.1253, "step": 126550 }, { "epoch": 1.24, "grad_norm": 4.604911804199219, "learning_rate": 2.991137931034483e-06, "loss": 0.3285, "step": 126575 }, { "epoch": 1.25, "grad_norm": 8.642210960388184, "learning_rate": 2.9907068965517243e-06, "loss": 0.1557, "step": 126600 }, { "epoch": 1.25, "grad_norm": 5.206338882446289, "learning_rate": 2.9902758620689657e-06, "loss": 0.2777, "step": 126625 }, { "epoch": 1.25, "grad_norm": 5.653742790222168, "learning_rate": 2.9898448275862068e-06, "loss": 0.1397, "step": 126650 }, { "epoch": 1.25, "grad_norm": 4.065059661865234, "learning_rate": 2.9894137931034482e-06, "loss": 0.2825, "step": 126675 }, { "epoch": 1.25, "grad_norm": 4.304654121398926, "learning_rate": 2.9889827586206897e-06, "loss": 0.1388, "step": 126700 }, { "epoch": 1.25, "grad_norm": 3.7188360691070557, "learning_rate": 2.9885517241379316e-06, "loss": 0.2748, "step": 126725 }, { "epoch": 1.25, "grad_norm": 5.157789707183838, "learning_rate": 2.988120689655173e-06, "loss": 0.1713, "step": 126750 }, { "epoch": 1.25, "grad_norm": 8.52896499633789, "learning_rate": 2.9876896551724136e-06, "loss": 0.318, "step": 126775 }, { "epoch": 1.25, "grad_norm": 6.587066650390625, "learning_rate": 2.9872586206896555e-06, "loss": 0.1336, "step": 126800 }, { "epoch": 1.25, "grad_norm": 4.217599868774414, "learning_rate": 2.986827586206897e-06, "loss": 0.2528, "step": 126825 }, { "epoch": 1.25, "grad_norm": 8.465712547302246, "learning_rate": 2.9863965517241384e-06, "loss": 0.1491, "step": 126850 }, { "epoch": 1.25, "grad_norm": 4.16227388381958, "learning_rate": 2.9859655172413794e-06, "loss": 0.3068, "step": 126875 }, { "epoch": 1.25, "grad_norm": 8.91474437713623, "learning_rate": 2.985534482758621e-06, "loss": 0.15, "step": 126900 }, { "epoch": 1.25, "grad_norm": 4.221991539001465, "learning_rate": 2.9851034482758624e-06, "loss": 0.3089, "step": 126925 }, { "epoch": 1.25, "grad_norm": 10.06592082977295, "learning_rate": 2.984672413793104e-06, "loss": 0.1465, "step": 126950 }, { "epoch": 1.25, "grad_norm": 5.015525817871094, "learning_rate": 2.9842413793103453e-06, "loss": 0.235, "step": 126975 }, { "epoch": 1.25, "grad_norm": 11.486400604248047, "learning_rate": 2.9838103448275863e-06, "loss": 0.1493, "step": 127000 }, { "epoch": 1.25, "grad_norm": 4.76718282699585, "learning_rate": 2.9833793103448278e-06, "loss": 0.3347, "step": 127025 }, { "epoch": 1.25, "grad_norm": 11.37607479095459, "learning_rate": 2.9829482758620692e-06, "loss": 0.1583, "step": 127050 }, { "epoch": 1.25, "grad_norm": 3.9364430904388428, "learning_rate": 2.9825172413793107e-06, "loss": 0.3232, "step": 127075 }, { "epoch": 1.25, "grad_norm": 3.8245623111724854, "learning_rate": 2.9820862068965517e-06, "loss": 0.1254, "step": 127100 }, { "epoch": 1.25, "grad_norm": 3.5435380935668945, "learning_rate": 2.981655172413793e-06, "loss": 0.2545, "step": 127125 }, { "epoch": 1.25, "grad_norm": 6.796573638916016, "learning_rate": 2.9812241379310346e-06, "loss": 0.1477, "step": 127150 }, { "epoch": 1.25, "grad_norm": 3.9401025772094727, "learning_rate": 2.980793103448276e-06, "loss": 0.3206, "step": 127175 }, { "epoch": 1.25, "grad_norm": 5.60601806640625, "learning_rate": 2.9803620689655175e-06, "loss": 0.1003, "step": 127200 }, { "epoch": 1.25, "grad_norm": 3.74497389793396, "learning_rate": 2.9799310344827586e-06, "loss": 0.2818, "step": 127225 }, { "epoch": 1.25, "grad_norm": 9.512886047363281, "learning_rate": 2.9795e-06, "loss": 0.1393, "step": 127250 }, { "epoch": 1.25, "grad_norm": 6.197235107421875, "learning_rate": 2.9790689655172415e-06, "loss": 0.3609, "step": 127275 }, { "epoch": 1.25, "grad_norm": 13.281719207763672, "learning_rate": 2.9786379310344834e-06, "loss": 0.1474, "step": 127300 }, { "epoch": 1.25, "grad_norm": 5.212763786315918, "learning_rate": 2.978206896551724e-06, "loss": 0.2703, "step": 127325 }, { "epoch": 1.25, "grad_norm": 4.199653148651123, "learning_rate": 2.9777758620689654e-06, "loss": 0.1558, "step": 127350 }, { "epoch": 1.25, "grad_norm": 4.336798667907715, "learning_rate": 2.9773448275862073e-06, "loss": 0.3078, "step": 127375 }, { "epoch": 1.25, "grad_norm": 3.6930267810821533, "learning_rate": 2.9769137931034488e-06, "loss": 0.1486, "step": 127400 }, { "epoch": 1.25, "grad_norm": 3.9127228260040283, "learning_rate": 2.9764827586206902e-06, "loss": 0.2905, "step": 127425 }, { "epoch": 1.25, "grad_norm": 13.163771629333496, "learning_rate": 2.9760517241379312e-06, "loss": 0.1389, "step": 127450 }, { "epoch": 1.25, "grad_norm": 4.3756232261657715, "learning_rate": 2.9756206896551727e-06, "loss": 0.2573, "step": 127475 }, { "epoch": 1.25, "grad_norm": 6.491002082824707, "learning_rate": 2.975189655172414e-06, "loss": 0.1734, "step": 127500 }, { "epoch": 1.25, "grad_norm": 2.6277694702148438, "learning_rate": 2.9747586206896556e-06, "loss": 0.3041, "step": 127525 }, { "epoch": 1.25, "grad_norm": 8.461997032165527, "learning_rate": 2.9743275862068966e-06, "loss": 0.1348, "step": 127550 }, { "epoch": 1.25, "grad_norm": 5.419836044311523, "learning_rate": 2.973896551724138e-06, "loss": 0.326, "step": 127575 }, { "epoch": 1.25, "grad_norm": 8.392377853393555, "learning_rate": 2.9734655172413796e-06, "loss": 0.1405, "step": 127600 }, { "epoch": 1.26, "grad_norm": 4.874054431915283, "learning_rate": 2.973034482758621e-06, "loss": 0.2987, "step": 127625 }, { "epoch": 1.26, "grad_norm": 12.135010719299316, "learning_rate": 2.9726034482758625e-06, "loss": 0.1558, "step": 127650 }, { "epoch": 1.26, "grad_norm": 5.2677531242370605, "learning_rate": 2.9721724137931035e-06, "loss": 0.3526, "step": 127675 }, { "epoch": 1.26, "grad_norm": 8.33180046081543, "learning_rate": 2.971741379310345e-06, "loss": 0.1102, "step": 127700 }, { "epoch": 1.26, "grad_norm": 5.276817321777344, "learning_rate": 2.9713103448275864e-06, "loss": 0.3372, "step": 127725 }, { "epoch": 1.26, "grad_norm": 12.96988582611084, "learning_rate": 2.970879310344828e-06, "loss": 0.1503, "step": 127750 }, { "epoch": 1.26, "grad_norm": 4.899165153503418, "learning_rate": 2.970448275862069e-06, "loss": 0.273, "step": 127775 }, { "epoch": 1.26, "grad_norm": 6.293694496154785, "learning_rate": 2.9700172413793104e-06, "loss": 0.1209, "step": 127800 }, { "epoch": 1.26, "grad_norm": 3.3152408599853516, "learning_rate": 2.969586206896552e-06, "loss": 0.2748, "step": 127825 }, { "epoch": 1.26, "grad_norm": 5.641828536987305, "learning_rate": 2.9691551724137933e-06, "loss": 0.1378, "step": 127850 }, { "epoch": 1.26, "grad_norm": 4.185665607452393, "learning_rate": 2.968724137931035e-06, "loss": 0.2494, "step": 127875 }, { "epoch": 1.26, "grad_norm": 8.273247718811035, "learning_rate": 2.9682931034482758e-06, "loss": 0.1278, "step": 127900 }, { "epoch": 1.26, "grad_norm": 3.7332422733306885, "learning_rate": 2.9678620689655172e-06, "loss": 0.2927, "step": 127925 }, { "epoch": 1.26, "grad_norm": 7.200292587280273, "learning_rate": 2.967431034482759e-06, "loss": 0.1415, "step": 127950 }, { "epoch": 1.26, "grad_norm": 4.429804801940918, "learning_rate": 2.9670000000000006e-06, "loss": 0.3177, "step": 127975 }, { "epoch": 1.26, "grad_norm": 7.578754901885986, "learning_rate": 2.9665689655172416e-06, "loss": 0.1401, "step": 128000 }, { "epoch": 1.26, "grad_norm": 4.866497993469238, "learning_rate": 2.966137931034483e-06, "loss": 0.2812, "step": 128025 }, { "epoch": 1.26, "grad_norm": 7.854349136352539, "learning_rate": 2.9657068965517245e-06, "loss": 0.1789, "step": 128050 }, { "epoch": 1.26, "grad_norm": 6.356574058532715, "learning_rate": 2.965275862068966e-06, "loss": 0.2764, "step": 128075 }, { "epoch": 1.26, "grad_norm": 3.640443801879883, "learning_rate": 2.9648448275862074e-06, "loss": 0.1428, "step": 128100 }, { "epoch": 1.26, "grad_norm": 6.923694610595703, "learning_rate": 2.9644137931034485e-06, "loss": 0.3322, "step": 128125 }, { "epoch": 1.26, "grad_norm": 9.732985496520996, "learning_rate": 2.96398275862069e-06, "loss": 0.1303, "step": 128150 }, { "epoch": 1.26, "grad_norm": 5.061055660247803, "learning_rate": 2.9635517241379314e-06, "loss": 0.303, "step": 128175 }, { "epoch": 1.26, "grad_norm": 17.826576232910156, "learning_rate": 2.963120689655173e-06, "loss": 0.1393, "step": 128200 }, { "epoch": 1.26, "grad_norm": 4.278364181518555, "learning_rate": 2.962689655172414e-06, "loss": 0.2569, "step": 128225 }, { "epoch": 1.26, "grad_norm": 12.361920356750488, "learning_rate": 2.9622586206896553e-06, "loss": 0.1501, "step": 128250 }, { "epoch": 1.26, "grad_norm": 3.164515972137451, "learning_rate": 2.9618275862068968e-06, "loss": 0.251, "step": 128275 }, { "epoch": 1.26, "grad_norm": 13.190546035766602, "learning_rate": 2.9613965517241382e-06, "loss": 0.1491, "step": 128300 }, { "epoch": 1.26, "grad_norm": 5.174039840698242, "learning_rate": 2.9609655172413797e-06, "loss": 0.2837, "step": 128325 }, { "epoch": 1.26, "grad_norm": 8.77861213684082, "learning_rate": 2.9605344827586207e-06, "loss": 0.1404, "step": 128350 }, { "epoch": 1.26, "grad_norm": 4.460209369659424, "learning_rate": 2.960103448275862e-06, "loss": 0.2872, "step": 128375 }, { "epoch": 1.26, "grad_norm": 7.272961139678955, "learning_rate": 2.9596724137931036e-06, "loss": 0.1602, "step": 128400 }, { "epoch": 1.26, "grad_norm": 4.884068012237549, "learning_rate": 2.959241379310345e-06, "loss": 0.2928, "step": 128425 }, { "epoch": 1.26, "grad_norm": 6.150003910064697, "learning_rate": 2.958810344827586e-06, "loss": 0.1298, "step": 128450 }, { "epoch": 1.26, "grad_norm": 4.501097202301025, "learning_rate": 2.9583793103448276e-06, "loss": 0.275, "step": 128475 }, { "epoch": 1.26, "grad_norm": 7.512777328491211, "learning_rate": 2.957948275862069e-06, "loss": 0.1542, "step": 128500 }, { "epoch": 1.26, "grad_norm": 3.7602627277374268, "learning_rate": 2.957534482758621e-06, "loss": 0.2486, "step": 128525 }, { "epoch": 1.26, "grad_norm": 8.06043529510498, "learning_rate": 2.957103448275862e-06, "loss": 0.155, "step": 128550 }, { "epoch": 1.26, "grad_norm": 4.751469612121582, "learning_rate": 2.9566724137931034e-06, "loss": 0.2791, "step": 128575 }, { "epoch": 1.26, "grad_norm": 5.389976978302002, "learning_rate": 2.956241379310345e-06, "loss": 0.1931, "step": 128600 }, { "epoch": 1.27, "grad_norm": 4.668659210205078, "learning_rate": 2.9558103448275867e-06, "loss": 0.2961, "step": 128625 }, { "epoch": 1.27, "grad_norm": 15.729554176330566, "learning_rate": 2.955379310344828e-06, "loss": 0.1706, "step": 128650 }, { "epoch": 1.27, "grad_norm": 3.9298288822174072, "learning_rate": 2.954948275862069e-06, "loss": 0.3439, "step": 128675 }, { "epoch": 1.27, "grad_norm": 11.452493667602539, "learning_rate": 2.9545172413793107e-06, "loss": 0.1433, "step": 128700 }, { "epoch": 1.27, "grad_norm": 3.179719924926758, "learning_rate": 2.954086206896552e-06, "loss": 0.3501, "step": 128725 }, { "epoch": 1.27, "grad_norm": 14.261443138122559, "learning_rate": 2.9536551724137936e-06, "loss": 0.1336, "step": 128750 }, { "epoch": 1.27, "grad_norm": 8.213741302490234, "learning_rate": 2.9532241379310346e-06, "loss": 0.3074, "step": 128775 }, { "epoch": 1.27, "grad_norm": 6.025684833526611, "learning_rate": 2.952793103448276e-06, "loss": 0.1361, "step": 128800 }, { "epoch": 1.27, "grad_norm": 5.308666706085205, "learning_rate": 2.9523620689655175e-06, "loss": 0.304, "step": 128825 }, { "epoch": 1.27, "grad_norm": 11.077136993408203, "learning_rate": 2.951931034482759e-06, "loss": 0.182, "step": 128850 }, { "epoch": 1.27, "grad_norm": 4.262418746948242, "learning_rate": 2.9515000000000005e-06, "loss": 0.2706, "step": 128875 }, { "epoch": 1.27, "grad_norm": 6.229832172393799, "learning_rate": 2.9510689655172415e-06, "loss": 0.1413, "step": 128900 }, { "epoch": 1.27, "grad_norm": 19.83920669555664, "learning_rate": 2.950637931034483e-06, "loss": 0.2572, "step": 128925 }, { "epoch": 1.27, "grad_norm": 7.288382530212402, "learning_rate": 2.9502068965517244e-06, "loss": 0.1414, "step": 128950 }, { "epoch": 1.27, "grad_norm": 4.045971870422363, "learning_rate": 2.949775862068966e-06, "loss": 0.2619, "step": 128975 }, { "epoch": 1.27, "grad_norm": 15.373640060424805, "learning_rate": 2.949344827586207e-06, "loss": 0.1468, "step": 129000 }, { "epoch": 1.27, "grad_norm": 5.036344528198242, "learning_rate": 2.9489137931034483e-06, "loss": 0.2753, "step": 129025 }, { "epoch": 1.27, "grad_norm": 6.185795307159424, "learning_rate": 2.94848275862069e-06, "loss": 0.1764, "step": 129050 }, { "epoch": 1.27, "grad_norm": 4.06866979598999, "learning_rate": 2.9480517241379313e-06, "loss": 0.265, "step": 129075 }, { "epoch": 1.27, "grad_norm": 10.370000839233398, "learning_rate": 2.9476206896551727e-06, "loss": 0.159, "step": 129100 }, { "epoch": 1.27, "grad_norm": 4.046319007873535, "learning_rate": 2.9471896551724137e-06, "loss": 0.2687, "step": 129125 }, { "epoch": 1.27, "grad_norm": 6.559055328369141, "learning_rate": 2.946758620689655e-06, "loss": 0.1504, "step": 129150 }, { "epoch": 1.27, "grad_norm": 5.510786533355713, "learning_rate": 2.9463275862068967e-06, "loss": 0.2759, "step": 129175 }, { "epoch": 1.27, "grad_norm": 10.097411155700684, "learning_rate": 2.9458965517241385e-06, "loss": 0.1626, "step": 129200 }, { "epoch": 1.27, "grad_norm": 5.529594421386719, "learning_rate": 2.945465517241379e-06, "loss": 0.2813, "step": 129225 }, { "epoch": 1.27, "grad_norm": 8.813091278076172, "learning_rate": 2.9450344827586206e-06, "loss": 0.1699, "step": 129250 }, { "epoch": 1.27, "grad_norm": 3.438734769821167, "learning_rate": 2.9446034482758625e-06, "loss": 0.2659, "step": 129275 }, { "epoch": 1.27, "grad_norm": 8.740754127502441, "learning_rate": 2.944172413793104e-06, "loss": 0.1521, "step": 129300 }, { "epoch": 1.27, "grad_norm": 4.331121921539307, "learning_rate": 2.9437413793103454e-06, "loss": 0.2381, "step": 129325 }, { "epoch": 1.27, "grad_norm": 15.061365127563477, "learning_rate": 2.9433103448275864e-06, "loss": 0.1531, "step": 129350 }, { "epoch": 1.27, "grad_norm": 4.218088150024414, "learning_rate": 2.942879310344828e-06, "loss": 0.2942, "step": 129375 }, { "epoch": 1.27, "grad_norm": 10.744329452514648, "learning_rate": 2.9424482758620693e-06, "loss": 0.162, "step": 129400 }, { "epoch": 1.27, "grad_norm": 4.834748268127441, "learning_rate": 2.942017241379311e-06, "loss": 0.3092, "step": 129425 }, { "epoch": 1.27, "grad_norm": 10.887154579162598, "learning_rate": 2.941586206896552e-06, "loss": 0.1445, "step": 129450 }, { "epoch": 1.27, "grad_norm": 4.692255020141602, "learning_rate": 2.9411551724137933e-06, "loss": 0.2885, "step": 129475 }, { "epoch": 1.27, "grad_norm": 10.418717384338379, "learning_rate": 2.9407241379310347e-06, "loss": 0.1569, "step": 129500 }, { "epoch": 1.27, "grad_norm": 3.491201639175415, "learning_rate": 2.940293103448276e-06, "loss": 0.2718, "step": 129525 }, { "epoch": 1.27, "grad_norm": 7.673471927642822, "learning_rate": 2.9398620689655177e-06, "loss": 0.1269, "step": 129550 }, { "epoch": 1.27, "grad_norm": 4.455288410186768, "learning_rate": 2.9394310344827587e-06, "loss": 0.2305, "step": 129575 }, { "epoch": 1.27, "grad_norm": 4.18178129196167, "learning_rate": 2.939e-06, "loss": 0.1211, "step": 129600 }, { "epoch": 1.27, "grad_norm": 4.6959943771362305, "learning_rate": 2.9385689655172416e-06, "loss": 0.3023, "step": 129625 }, { "epoch": 1.28, "grad_norm": 4.321465015411377, "learning_rate": 2.938137931034483e-06, "loss": 0.1404, "step": 129650 }, { "epoch": 1.28, "grad_norm": 4.650800704956055, "learning_rate": 2.937706896551724e-06, "loss": 0.2795, "step": 129675 }, { "epoch": 1.28, "grad_norm": 11.632253646850586, "learning_rate": 2.9372758620689656e-06, "loss": 0.1283, "step": 129700 }, { "epoch": 1.28, "grad_norm": 4.687709808349609, "learning_rate": 2.936844827586207e-06, "loss": 0.2986, "step": 129725 }, { "epoch": 1.28, "grad_norm": 22.352487564086914, "learning_rate": 2.9364137931034485e-06, "loss": 0.1607, "step": 129750 }, { "epoch": 1.28, "grad_norm": 3.2081925868988037, "learning_rate": 2.9359827586206903e-06, "loss": 0.2596, "step": 129775 }, { "epoch": 1.28, "grad_norm": 8.25133228302002, "learning_rate": 2.935551724137931e-06, "loss": 0.1306, "step": 129800 }, { "epoch": 1.28, "grad_norm": 4.140007972717285, "learning_rate": 2.9351206896551724e-06, "loss": 0.3526, "step": 129825 }, { "epoch": 1.28, "grad_norm": 4.087214469909668, "learning_rate": 2.9346896551724143e-06, "loss": 0.1508, "step": 129850 }, { "epoch": 1.28, "grad_norm": 5.410165309906006, "learning_rate": 2.9342586206896557e-06, "loss": 0.293, "step": 129875 }, { "epoch": 1.28, "grad_norm": 8.250146865844727, "learning_rate": 2.9338275862068964e-06, "loss": 0.1452, "step": 129900 }, { "epoch": 1.28, "grad_norm": 5.262845516204834, "learning_rate": 2.9333965517241382e-06, "loss": 0.293, "step": 129925 }, { "epoch": 1.28, "grad_norm": 6.927124977111816, "learning_rate": 2.9329655172413797e-06, "loss": 0.138, "step": 129950 }, { "epoch": 1.28, "grad_norm": 3.10107159614563, "learning_rate": 2.932534482758621e-06, "loss": 0.3036, "step": 129975 }, { "epoch": 1.28, "grad_norm": 6.227792739868164, "learning_rate": 2.9321034482758626e-06, "loss": 0.1231, "step": 130000 }, { "epoch": 1.28, "grad_norm": 4.816910743713379, "learning_rate": 2.9316724137931036e-06, "loss": 0.3759, "step": 130025 }, { "epoch": 1.28, "grad_norm": 10.218239784240723, "learning_rate": 2.931241379310345e-06, "loss": 0.1318, "step": 130050 }, { "epoch": 1.28, "grad_norm": 2.9970576763153076, "learning_rate": 2.9308103448275866e-06, "loss": 0.2836, "step": 130075 }, { "epoch": 1.28, "grad_norm": 8.236226081848145, "learning_rate": 2.930379310344828e-06, "loss": 0.1596, "step": 130100 }, { "epoch": 1.28, "grad_norm": 5.449472904205322, "learning_rate": 2.929948275862069e-06, "loss": 0.2797, "step": 130125 }, { "epoch": 1.28, "grad_norm": 9.283870697021484, "learning_rate": 2.9295172413793105e-06, "loss": 0.1541, "step": 130150 }, { "epoch": 1.28, "grad_norm": 5.826918601989746, "learning_rate": 2.929086206896552e-06, "loss": 0.3139, "step": 130175 }, { "epoch": 1.28, "grad_norm": 12.128264427185059, "learning_rate": 2.9286551724137934e-06, "loss": 0.1458, "step": 130200 }, { "epoch": 1.28, "grad_norm": 3.87733793258667, "learning_rate": 2.928224137931035e-06, "loss": 0.3452, "step": 130225 }, { "epoch": 1.28, "grad_norm": 10.659512519836426, "learning_rate": 2.927793103448276e-06, "loss": 0.1414, "step": 130250 }, { "epoch": 1.28, "grad_norm": 4.464568614959717, "learning_rate": 2.9273620689655174e-06, "loss": 0.2754, "step": 130275 }, { "epoch": 1.28, "grad_norm": 9.748737335205078, "learning_rate": 2.926931034482759e-06, "loss": 0.1353, "step": 130300 }, { "epoch": 1.28, "grad_norm": 3.8342738151550293, "learning_rate": 2.9265000000000003e-06, "loss": 0.2909, "step": 130325 }, { "epoch": 1.28, "grad_norm": 7.490649700164795, "learning_rate": 2.9260689655172413e-06, "loss": 0.1394, "step": 130350 }, { "epoch": 1.28, "grad_norm": 4.017355442047119, "learning_rate": 2.9256379310344828e-06, "loss": 0.264, "step": 130375 }, { "epoch": 1.28, "grad_norm": 6.154755115509033, "learning_rate": 2.9252068965517242e-06, "loss": 0.1271, "step": 130400 }, { "epoch": 1.28, "grad_norm": 4.357958793640137, "learning_rate": 2.924775862068966e-06, "loss": 0.2294, "step": 130425 }, { "epoch": 1.28, "grad_norm": 3.7800559997558594, "learning_rate": 2.9243448275862076e-06, "loss": 0.1245, "step": 130450 }, { "epoch": 1.28, "grad_norm": 4.383526802062988, "learning_rate": 2.923913793103448e-06, "loss": 0.2617, "step": 130475 }, { "epoch": 1.28, "grad_norm": 11.535680770874023, "learning_rate": 2.92348275862069e-06, "loss": 0.147, "step": 130500 }, { "epoch": 1.28, "grad_norm": 6.731508255004883, "learning_rate": 2.9230517241379315e-06, "loss": 0.2885, "step": 130525 }, { "epoch": 1.28, "grad_norm": 6.324944019317627, "learning_rate": 2.922620689655173e-06, "loss": 0.1647, "step": 130550 }, { "epoch": 1.28, "grad_norm": 3.702488422393799, "learning_rate": 2.922189655172414e-06, "loss": 0.2983, "step": 130575 }, { "epoch": 1.28, "grad_norm": 3.6747264862060547, "learning_rate": 2.9217586206896554e-06, "loss": 0.1694, "step": 130600 }, { "epoch": 1.28, "grad_norm": 3.508518695831299, "learning_rate": 2.9213448275862073e-06, "loss": 0.2819, "step": 130625 }, { "epoch": 1.28, "grad_norm": 10.01076602935791, "learning_rate": 2.9209137931034488e-06, "loss": 0.1408, "step": 130650 }, { "epoch": 1.29, "grad_norm": 10.803929328918457, "learning_rate": 2.92048275862069e-06, "loss": 0.2806, "step": 130675 }, { "epoch": 1.29, "grad_norm": 6.725844860076904, "learning_rate": 2.9200517241379313e-06, "loss": 0.1472, "step": 130700 }, { "epoch": 1.29, "grad_norm": 4.731884479522705, "learning_rate": 2.9196206896551727e-06, "loss": 0.2828, "step": 130725 }, { "epoch": 1.29, "grad_norm": 8.972899436950684, "learning_rate": 2.919189655172414e-06, "loss": 0.1104, "step": 130750 }, { "epoch": 1.29, "grad_norm": 3.7217631340026855, "learning_rate": 2.9187586206896556e-06, "loss": 0.2937, "step": 130775 }, { "epoch": 1.29, "grad_norm": 10.005192756652832, "learning_rate": 2.9183275862068967e-06, "loss": 0.1734, "step": 130800 }, { "epoch": 1.29, "grad_norm": 5.158206939697266, "learning_rate": 2.917896551724138e-06, "loss": 0.291, "step": 130825 }, { "epoch": 1.29, "grad_norm": 7.375265121459961, "learning_rate": 2.9174655172413796e-06, "loss": 0.1435, "step": 130850 }, { "epoch": 1.29, "grad_norm": 5.192901611328125, "learning_rate": 2.917034482758621e-06, "loss": 0.2986, "step": 130875 }, { "epoch": 1.29, "grad_norm": 7.944281101226807, "learning_rate": 2.916603448275862e-06, "loss": 0.16, "step": 130900 }, { "epoch": 1.29, "grad_norm": 4.540588855743408, "learning_rate": 2.9161724137931035e-06, "loss": 0.2781, "step": 130925 }, { "epoch": 1.29, "grad_norm": 12.859493255615234, "learning_rate": 2.915741379310345e-06, "loss": 0.1603, "step": 130950 }, { "epoch": 1.29, "grad_norm": 3.33768630027771, "learning_rate": 2.9153103448275864e-06, "loss": 0.3401, "step": 130975 }, { "epoch": 1.29, "grad_norm": 8.630828857421875, "learning_rate": 2.914879310344828e-06, "loss": 0.1669, "step": 131000 }, { "epoch": 1.29, "grad_norm": 4.278383731842041, "learning_rate": 2.914448275862069e-06, "loss": 0.3004, "step": 131025 }, { "epoch": 1.29, "grad_norm": 10.032734870910645, "learning_rate": 2.9140172413793104e-06, "loss": 0.1304, "step": 131050 }, { "epoch": 1.29, "grad_norm": 6.099024772644043, "learning_rate": 2.913586206896552e-06, "loss": 0.3148, "step": 131075 }, { "epoch": 1.29, "grad_norm": 7.655325412750244, "learning_rate": 2.9131551724137937e-06, "loss": 0.1573, "step": 131100 }, { "epoch": 1.29, "grad_norm": 4.656228542327881, "learning_rate": 2.9127241379310343e-06, "loss": 0.2745, "step": 131125 }, { "epoch": 1.29, "grad_norm": 5.732217311859131, "learning_rate": 2.912293103448276e-06, "loss": 0.1493, "step": 131150 }, { "epoch": 1.29, "grad_norm": 4.536261558532715, "learning_rate": 2.9118620689655177e-06, "loss": 0.308, "step": 131175 }, { "epoch": 1.29, "grad_norm": 10.173456192016602, "learning_rate": 2.911431034482759e-06, "loss": 0.1346, "step": 131200 }, { "epoch": 1.29, "grad_norm": 5.01147985458374, "learning_rate": 2.9110000000000006e-06, "loss": 0.2566, "step": 131225 }, { "epoch": 1.29, "grad_norm": 5.04200553894043, "learning_rate": 2.9105689655172416e-06, "loss": 0.1433, "step": 131250 }, { "epoch": 1.29, "grad_norm": 4.199446678161621, "learning_rate": 2.910137931034483e-06, "loss": 0.2607, "step": 131275 }, { "epoch": 1.29, "grad_norm": 9.277849197387695, "learning_rate": 2.9097068965517245e-06, "loss": 0.1232, "step": 131300 }, { "epoch": 1.29, "grad_norm": 3.4611284732818604, "learning_rate": 2.909275862068966e-06, "loss": 0.293, "step": 131325 }, { "epoch": 1.29, "grad_norm": 8.909700393676758, "learning_rate": 2.908844827586207e-06, "loss": 0.1568, "step": 131350 }, { "epoch": 1.29, "grad_norm": 6.870661735534668, "learning_rate": 2.9084137931034485e-06, "loss": 0.3638, "step": 131375 }, { "epoch": 1.29, "grad_norm": 10.38987922668457, "learning_rate": 2.90798275862069e-06, "loss": 0.133, "step": 131400 }, { "epoch": 1.29, "grad_norm": 5.909841060638428, "learning_rate": 2.9075517241379314e-06, "loss": 0.2442, "step": 131425 }, { "epoch": 1.29, "grad_norm": 8.12863826751709, "learning_rate": 2.907120689655173e-06, "loss": 0.132, "step": 131450 }, { "epoch": 1.29, "grad_norm": 3.6753342151641846, "learning_rate": 2.906689655172414e-06, "loss": 0.3089, "step": 131475 }, { "epoch": 1.29, "grad_norm": 6.58772087097168, "learning_rate": 2.9062586206896553e-06, "loss": 0.1772, "step": 131500 }, { "epoch": 1.29, "grad_norm": 5.252254009246826, "learning_rate": 2.905827586206897e-06, "loss": 0.2772, "step": 131525 }, { "epoch": 1.29, "grad_norm": 7.336836814880371, "learning_rate": 2.9053965517241383e-06, "loss": 0.1851, "step": 131550 }, { "epoch": 1.29, "grad_norm": 4.433798313140869, "learning_rate": 2.9049655172413793e-06, "loss": 0.2694, "step": 131575 }, { "epoch": 1.29, "grad_norm": 9.478620529174805, "learning_rate": 2.9045344827586207e-06, "loss": 0.1574, "step": 131600 }, { "epoch": 1.29, "grad_norm": 4.101605415344238, "learning_rate": 2.904103448275862e-06, "loss": 0.3313, "step": 131625 }, { "epoch": 1.29, "grad_norm": 7.234457492828369, "learning_rate": 2.9036724137931037e-06, "loss": 0.153, "step": 131650 }, { "epoch": 1.3, "grad_norm": 4.0579142570495605, "learning_rate": 2.9032413793103455e-06, "loss": 0.3398, "step": 131675 }, { "epoch": 1.3, "grad_norm": 7.373812675476074, "learning_rate": 2.902810344827586e-06, "loss": 0.1234, "step": 131700 }, { "epoch": 1.3, "grad_norm": 7.135587215423584, "learning_rate": 2.9023793103448276e-06, "loss": 0.3144, "step": 131725 }, { "epoch": 1.3, "grad_norm": 8.953351020812988, "learning_rate": 2.9019482758620695e-06, "loss": 0.1684, "step": 131750 }, { "epoch": 1.3, "grad_norm": 3.77937388420105, "learning_rate": 2.901517241379311e-06, "loss": 0.2725, "step": 131775 }, { "epoch": 1.3, "grad_norm": 10.590433120727539, "learning_rate": 2.9010862068965515e-06, "loss": 0.1286, "step": 131800 }, { "epoch": 1.3, "grad_norm": 3.575004816055298, "learning_rate": 2.9006551724137934e-06, "loss": 0.2861, "step": 131825 }, { "epoch": 1.3, "grad_norm": 8.272866249084473, "learning_rate": 2.900224137931035e-06, "loss": 0.1349, "step": 131850 }, { "epoch": 1.3, "grad_norm": 3.513216018676758, "learning_rate": 2.8997931034482763e-06, "loss": 0.2655, "step": 131875 }, { "epoch": 1.3, "grad_norm": 7.484994411468506, "learning_rate": 2.899362068965518e-06, "loss": 0.1282, "step": 131900 }, { "epoch": 1.3, "grad_norm": 4.439557075500488, "learning_rate": 2.898931034482759e-06, "loss": 0.2588, "step": 131925 }, { "epoch": 1.3, "grad_norm": 12.311460494995117, "learning_rate": 2.8985000000000003e-06, "loss": 0.173, "step": 131950 }, { "epoch": 1.3, "grad_norm": 3.516361713409424, "learning_rate": 2.8980689655172417e-06, "loss": 0.3027, "step": 131975 }, { "epoch": 1.3, "grad_norm": 25.13035774230957, "learning_rate": 2.897637931034483e-06, "loss": 0.1855, "step": 132000 }, { "epoch": 1.3, "grad_norm": 2.7994894981384277, "learning_rate": 2.8972068965517242e-06, "loss": 0.2906, "step": 132025 }, { "epoch": 1.3, "grad_norm": 11.588996887207031, "learning_rate": 2.8967758620689657e-06, "loss": 0.1433, "step": 132050 }, { "epoch": 1.3, "grad_norm": 6.353439807891846, "learning_rate": 2.896344827586207e-06, "loss": 0.2588, "step": 132075 }, { "epoch": 1.3, "grad_norm": 12.117056846618652, "learning_rate": 2.8959137931034486e-06, "loss": 0.1645, "step": 132100 }, { "epoch": 1.3, "grad_norm": 4.339816570281982, "learning_rate": 2.8954827586206896e-06, "loss": 0.2502, "step": 132125 }, { "epoch": 1.3, "grad_norm": 12.39195728302002, "learning_rate": 2.895051724137931e-06, "loss": 0.1483, "step": 132150 }, { "epoch": 1.3, "grad_norm": 7.749324798583984, "learning_rate": 2.8946206896551725e-06, "loss": 0.3078, "step": 132175 }, { "epoch": 1.3, "grad_norm": 5.093759059906006, "learning_rate": 2.894189655172414e-06, "loss": 0.1422, "step": 132200 }, { "epoch": 1.3, "grad_norm": 3.8947110176086426, "learning_rate": 2.8937586206896555e-06, "loss": 0.3203, "step": 132225 }, { "epoch": 1.3, "grad_norm": 6.082670211791992, "learning_rate": 2.8933275862068965e-06, "loss": 0.1325, "step": 132250 }, { "epoch": 1.3, "grad_norm": 5.057648658752441, "learning_rate": 2.892896551724138e-06, "loss": 0.2909, "step": 132275 }, { "epoch": 1.3, "grad_norm": 4.759195327758789, "learning_rate": 2.8924655172413794e-06, "loss": 0.1522, "step": 132300 }, { "epoch": 1.3, "grad_norm": 4.967407703399658, "learning_rate": 2.8920344827586213e-06, "loss": 0.3095, "step": 132325 }, { "epoch": 1.3, "grad_norm": 10.389107704162598, "learning_rate": 2.891603448275862e-06, "loss": 0.1304, "step": 132350 }, { "epoch": 1.3, "grad_norm": 5.494095325469971, "learning_rate": 2.8911724137931034e-06, "loss": 0.2833, "step": 132375 }, { "epoch": 1.3, "grad_norm": 14.410687446594238, "learning_rate": 2.8907413793103452e-06, "loss": 0.1865, "step": 132400 }, { "epoch": 1.3, "grad_norm": 3.849229574203491, "learning_rate": 2.8903103448275867e-06, "loss": 0.2974, "step": 132425 }, { "epoch": 1.3, "grad_norm": 7.4761528968811035, "learning_rate": 2.889879310344828e-06, "loss": 0.1591, "step": 132450 }, { "epoch": 1.3, "grad_norm": 5.984861373901367, "learning_rate": 2.889448275862069e-06, "loss": 0.2577, "step": 132475 }, { "epoch": 1.3, "grad_norm": 12.181112289428711, "learning_rate": 2.8890172413793106e-06, "loss": 0.1442, "step": 132500 }, { "epoch": 1.3, "grad_norm": 5.805852890014648, "learning_rate": 2.888586206896552e-06, "loss": 0.2325, "step": 132525 }, { "epoch": 1.3, "grad_norm": 8.679492950439453, "learning_rate": 2.8881551724137935e-06, "loss": 0.153, "step": 132550 }, { "epoch": 1.3, "grad_norm": 3.444734573364258, "learning_rate": 2.8877241379310346e-06, "loss": 0.2668, "step": 132575 }, { "epoch": 1.3, "grad_norm": 20.10346221923828, "learning_rate": 2.887293103448276e-06, "loss": 0.1602, "step": 132600 }, { "epoch": 1.3, "grad_norm": 6.367146968841553, "learning_rate": 2.8868620689655175e-06, "loss": 0.2616, "step": 132625 }, { "epoch": 1.3, "grad_norm": 4.134946346282959, "learning_rate": 2.886431034482759e-06, "loss": 0.13, "step": 132650 }, { "epoch": 1.3, "grad_norm": 4.844989776611328, "learning_rate": 2.8860000000000004e-06, "loss": 0.2873, "step": 132675 }, { "epoch": 1.31, "grad_norm": 7.880080223083496, "learning_rate": 2.8855689655172414e-06, "loss": 0.1756, "step": 132700 }, { "epoch": 1.31, "grad_norm": 5.033351421356201, "learning_rate": 2.885137931034483e-06, "loss": 0.2872, "step": 132725 }, { "epoch": 1.31, "grad_norm": 11.071396827697754, "learning_rate": 2.8847068965517244e-06, "loss": 0.1511, "step": 132750 }, { "epoch": 1.31, "grad_norm": 5.437605381011963, "learning_rate": 2.8842931034482762e-06, "loss": 0.2987, "step": 132775 }, { "epoch": 1.31, "grad_norm": 5.736367702484131, "learning_rate": 2.8838620689655173e-06, "loss": 0.1432, "step": 132800 }, { "epoch": 1.31, "grad_norm": 3.7764244079589844, "learning_rate": 2.8834310344827587e-06, "loss": 0.2618, "step": 132825 }, { "epoch": 1.31, "grad_norm": 18.952829360961914, "learning_rate": 2.883e-06, "loss": 0.1615, "step": 132850 }, { "epoch": 1.31, "grad_norm": 3.615722894668579, "learning_rate": 2.8825689655172416e-06, "loss": 0.2795, "step": 132875 }, { "epoch": 1.31, "grad_norm": 8.742515563964844, "learning_rate": 2.882137931034483e-06, "loss": 0.157, "step": 132900 }, { "epoch": 1.31, "grad_norm": 4.560403823852539, "learning_rate": 2.881706896551724e-06, "loss": 0.31, "step": 132925 }, { "epoch": 1.31, "grad_norm": 10.377070426940918, "learning_rate": 2.8812758620689656e-06, "loss": 0.1688, "step": 132950 }, { "epoch": 1.31, "grad_norm": 3.687265634536743, "learning_rate": 2.880844827586207e-06, "loss": 0.2513, "step": 132975 }, { "epoch": 1.31, "grad_norm": 10.470468521118164, "learning_rate": 2.880413793103449e-06, "loss": 0.149, "step": 133000 }, { "epoch": 1.31, "grad_norm": 3.8835325241088867, "learning_rate": 2.8799827586206895e-06, "loss": 0.2568, "step": 133025 }, { "epoch": 1.31, "grad_norm": 9.790465354919434, "learning_rate": 2.879551724137931e-06, "loss": 0.1602, "step": 133050 }, { "epoch": 1.31, "grad_norm": 5.201123237609863, "learning_rate": 2.879120689655173e-06, "loss": 0.2367, "step": 133075 }, { "epoch": 1.31, "grad_norm": 9.934571266174316, "learning_rate": 2.8786896551724143e-06, "loss": 0.137, "step": 133100 }, { "epoch": 1.31, "grad_norm": 7.059736251831055, "learning_rate": 2.8782586206896558e-06, "loss": 0.2707, "step": 133125 }, { "epoch": 1.31, "grad_norm": 9.252767562866211, "learning_rate": 2.877827586206897e-06, "loss": 0.1041, "step": 133150 }, { "epoch": 1.31, "grad_norm": 5.178316593170166, "learning_rate": 2.8773965517241383e-06, "loss": 0.301, "step": 133175 }, { "epoch": 1.31, "grad_norm": 9.415428161621094, "learning_rate": 2.8769655172413797e-06, "loss": 0.1468, "step": 133200 }, { "epoch": 1.31, "grad_norm": 5.086308479309082, "learning_rate": 2.876534482758621e-06, "loss": 0.3065, "step": 133225 }, { "epoch": 1.31, "grad_norm": 14.839950561523438, "learning_rate": 2.8761034482758622e-06, "loss": 0.1475, "step": 133250 }, { "epoch": 1.31, "grad_norm": 5.556472301483154, "learning_rate": 2.8756724137931037e-06, "loss": 0.2779, "step": 133275 }, { "epoch": 1.31, "grad_norm": 8.223381042480469, "learning_rate": 2.875241379310345e-06, "loss": 0.1626, "step": 133300 }, { "epoch": 1.31, "grad_norm": 4.35391902923584, "learning_rate": 2.8748103448275866e-06, "loss": 0.3224, "step": 133325 }, { "epoch": 1.31, "grad_norm": 7.5912370681762695, "learning_rate": 2.874379310344828e-06, "loss": 0.1412, "step": 133350 }, { "epoch": 1.31, "grad_norm": 5.298002243041992, "learning_rate": 2.873948275862069e-06, "loss": 0.2995, "step": 133375 }, { "epoch": 1.31, "grad_norm": 13.964186668395996, "learning_rate": 2.8735172413793105e-06, "loss": 0.1371, "step": 133400 }, { "epoch": 1.31, "grad_norm": 5.3196940422058105, "learning_rate": 2.873086206896552e-06, "loss": 0.3008, "step": 133425 }, { "epoch": 1.31, "grad_norm": 6.032919883728027, "learning_rate": 2.8726551724137934e-06, "loss": 0.121, "step": 133450 }, { "epoch": 1.31, "grad_norm": 4.271698474884033, "learning_rate": 2.8722241379310345e-06, "loss": 0.243, "step": 133475 }, { "epoch": 1.31, "grad_norm": 6.070394992828369, "learning_rate": 2.871793103448276e-06, "loss": 0.169, "step": 133500 }, { "epoch": 1.31, "grad_norm": 4.94100284576416, "learning_rate": 2.8713620689655174e-06, "loss": 0.2615, "step": 133525 }, { "epoch": 1.31, "grad_norm": 7.678559303283691, "learning_rate": 2.870931034482759e-06, "loss": 0.148, "step": 133550 }, { "epoch": 1.31, "grad_norm": 4.262653350830078, "learning_rate": 2.8705000000000003e-06, "loss": 0.261, "step": 133575 }, { "epoch": 1.31, "grad_norm": 8.991642951965332, "learning_rate": 2.8700689655172413e-06, "loss": 0.1516, "step": 133600 }, { "epoch": 1.31, "grad_norm": 4.229039192199707, "learning_rate": 2.869637931034483e-06, "loss": 0.3292, "step": 133625 }, { "epoch": 1.31, "grad_norm": 10.346205711364746, "learning_rate": 2.8692068965517247e-06, "loss": 0.1843, "step": 133650 }, { "epoch": 1.31, "grad_norm": 4.426191329956055, "learning_rate": 2.868775862068966e-06, "loss": 0.2916, "step": 133675 }, { "epoch": 1.31, "grad_norm": 12.154417037963867, "learning_rate": 2.8683448275862067e-06, "loss": 0.1596, "step": 133700 }, { "epoch": 1.32, "grad_norm": 6.237637519836426, "learning_rate": 2.8679137931034486e-06, "loss": 0.2585, "step": 133725 }, { "epoch": 1.32, "grad_norm": 7.594276428222656, "learning_rate": 2.86748275862069e-06, "loss": 0.1364, "step": 133750 }, { "epoch": 1.32, "grad_norm": 3.343532085418701, "learning_rate": 2.8670517241379315e-06, "loss": 0.2604, "step": 133775 }, { "epoch": 1.32, "grad_norm": 4.67933988571167, "learning_rate": 2.8666206896551726e-06, "loss": 0.1249, "step": 133800 }, { "epoch": 1.32, "grad_norm": 5.494527816772461, "learning_rate": 2.866189655172414e-06, "loss": 0.2998, "step": 133825 }, { "epoch": 1.32, "grad_norm": 7.520778656005859, "learning_rate": 2.8657586206896555e-06, "loss": 0.1605, "step": 133850 }, { "epoch": 1.32, "grad_norm": 5.825536251068115, "learning_rate": 2.865327586206897e-06, "loss": 0.2932, "step": 133875 }, { "epoch": 1.32, "grad_norm": 4.542388439178467, "learning_rate": 2.8648965517241384e-06, "loss": 0.149, "step": 133900 }, { "epoch": 1.32, "grad_norm": 5.952378273010254, "learning_rate": 2.8644655172413794e-06, "loss": 0.2583, "step": 133925 }, { "epoch": 1.32, "grad_norm": 5.181893825531006, "learning_rate": 2.864034482758621e-06, "loss": 0.1358, "step": 133950 }, { "epoch": 1.32, "grad_norm": 5.427099227905273, "learning_rate": 2.8636034482758623e-06, "loss": 0.3253, "step": 133975 }, { "epoch": 1.32, "grad_norm": 7.648345947265625, "learning_rate": 2.863172413793104e-06, "loss": 0.1629, "step": 134000 }, { "epoch": 1.32, "grad_norm": 9.0545654296875, "learning_rate": 2.862741379310345e-06, "loss": 0.322, "step": 134025 }, { "epoch": 1.32, "grad_norm": 11.672613143920898, "learning_rate": 2.8623103448275863e-06, "loss": 0.1649, "step": 134050 }, { "epoch": 1.32, "grad_norm": 4.535334587097168, "learning_rate": 2.8618793103448277e-06, "loss": 0.3111, "step": 134075 }, { "epoch": 1.32, "grad_norm": 8.706656455993652, "learning_rate": 2.861448275862069e-06, "loss": 0.1548, "step": 134100 }, { "epoch": 1.32, "grad_norm": 5.397627830505371, "learning_rate": 2.8610172413793106e-06, "loss": 0.2958, "step": 134125 }, { "epoch": 1.32, "grad_norm": 9.09106159210205, "learning_rate": 2.8605862068965517e-06, "loss": 0.1665, "step": 134150 }, { "epoch": 1.32, "grad_norm": 4.163392543792725, "learning_rate": 2.860155172413793e-06, "loss": 0.2849, "step": 134175 }, { "epoch": 1.32, "grad_norm": 8.92553997039795, "learning_rate": 2.8597241379310346e-06, "loss": 0.1397, "step": 134200 }, { "epoch": 1.32, "grad_norm": 10.687621116638184, "learning_rate": 2.8592931034482765e-06, "loss": 0.2851, "step": 134225 }, { "epoch": 1.32, "grad_norm": 6.463163375854492, "learning_rate": 2.858862068965517e-06, "loss": 0.1371, "step": 134250 }, { "epoch": 1.32, "grad_norm": 4.4840779304504395, "learning_rate": 2.8584310344827585e-06, "loss": 0.3006, "step": 134275 }, { "epoch": 1.32, "grad_norm": 6.04360818862915, "learning_rate": 2.8580000000000004e-06, "loss": 0.1391, "step": 134300 }, { "epoch": 1.32, "grad_norm": 6.144041061401367, "learning_rate": 2.857568965517242e-06, "loss": 0.2981, "step": 134325 }, { "epoch": 1.32, "grad_norm": 7.771648406982422, "learning_rate": 2.8571379310344833e-06, "loss": 0.1381, "step": 134350 }, { "epoch": 1.32, "grad_norm": 4.408580780029297, "learning_rate": 2.8567068965517244e-06, "loss": 0.31, "step": 134375 }, { "epoch": 1.32, "grad_norm": 4.267570972442627, "learning_rate": 2.856275862068966e-06, "loss": 0.1803, "step": 134400 }, { "epoch": 1.32, "grad_norm": 6.349417209625244, "learning_rate": 2.8558448275862073e-06, "loss": 0.3353, "step": 134425 }, { "epoch": 1.32, "grad_norm": 8.664112091064453, "learning_rate": 2.8554137931034487e-06, "loss": 0.1637, "step": 134450 }, { "epoch": 1.32, "grad_norm": 11.16759204864502, "learning_rate": 2.8549827586206898e-06, "loss": 0.3179, "step": 134475 }, { "epoch": 1.32, "grad_norm": 8.382109642028809, "learning_rate": 2.8545517241379312e-06, "loss": 0.1355, "step": 134500 }, { "epoch": 1.32, "grad_norm": 5.368013858795166, "learning_rate": 2.8541206896551727e-06, "loss": 0.2796, "step": 134525 }, { "epoch": 1.32, "grad_norm": 6.36500358581543, "learning_rate": 2.853689655172414e-06, "loss": 0.1223, "step": 134550 }, { "epoch": 1.32, "grad_norm": 4.630130767822266, "learning_rate": 2.8532586206896556e-06, "loss": 0.2578, "step": 134575 }, { "epoch": 1.32, "grad_norm": 11.194695472717285, "learning_rate": 2.8528275862068966e-06, "loss": 0.1711, "step": 134600 }, { "epoch": 1.32, "grad_norm": 4.48332405090332, "learning_rate": 2.852396551724138e-06, "loss": 0.2673, "step": 134625 }, { "epoch": 1.32, "grad_norm": 8.441535949707031, "learning_rate": 2.8519655172413795e-06, "loss": 0.1342, "step": 134650 }, { "epoch": 1.32, "grad_norm": 3.8572747707366943, "learning_rate": 2.851534482758621e-06, "loss": 0.2731, "step": 134675 }, { "epoch": 1.32, "grad_norm": 10.687954902648926, "learning_rate": 2.851103448275862e-06, "loss": 0.1202, "step": 134700 }, { "epoch": 1.33, "grad_norm": 5.145646572113037, "learning_rate": 2.8506724137931035e-06, "loss": 0.2977, "step": 134725 }, { "epoch": 1.33, "grad_norm": 8.544644355773926, "learning_rate": 2.850241379310345e-06, "loss": 0.1469, "step": 134750 }, { "epoch": 1.33, "grad_norm": 3.235098123550415, "learning_rate": 2.8498103448275864e-06, "loss": 0.2954, "step": 134775 }, { "epoch": 1.33, "grad_norm": 7.971279621124268, "learning_rate": 2.849379310344828e-06, "loss": 0.128, "step": 134800 }, { "epoch": 1.33, "grad_norm": 6.43930721282959, "learning_rate": 2.848948275862069e-06, "loss": 0.2875, "step": 134825 }, { "epoch": 1.33, "grad_norm": 9.395926475524902, "learning_rate": 2.8485172413793103e-06, "loss": 0.1568, "step": 134850 }, { "epoch": 1.33, "grad_norm": 3.6901814937591553, "learning_rate": 2.8481034482758622e-06, "loss": 0.2931, "step": 134875 }, { "epoch": 1.33, "grad_norm": 11.253026008605957, "learning_rate": 2.8476724137931037e-06, "loss": 0.1664, "step": 134900 }, { "epoch": 1.33, "grad_norm": 4.016238212585449, "learning_rate": 2.8472413793103447e-06, "loss": 0.2802, "step": 134925 }, { "epoch": 1.33, "grad_norm": 5.2043962478637695, "learning_rate": 2.846810344827586e-06, "loss": 0.149, "step": 134950 }, { "epoch": 1.33, "grad_norm": 3.42201828956604, "learning_rate": 2.8463793103448276e-06, "loss": 0.3232, "step": 134975 }, { "epoch": 1.33, "grad_norm": 6.579082489013672, "learning_rate": 2.8459482758620695e-06, "loss": 0.1281, "step": 135000 }, { "epoch": 1.33, "grad_norm": 3.2006871700286865, "learning_rate": 2.845517241379311e-06, "loss": 0.2384, "step": 135025 }, { "epoch": 1.33, "grad_norm": 6.77999210357666, "learning_rate": 2.845086206896552e-06, "loss": 0.1384, "step": 135050 }, { "epoch": 1.33, "grad_norm": 3.3241662979125977, "learning_rate": 2.8446551724137935e-06, "loss": 0.2576, "step": 135075 }, { "epoch": 1.33, "grad_norm": 9.345857620239258, "learning_rate": 2.844224137931035e-06, "loss": 0.1568, "step": 135100 }, { "epoch": 1.33, "grad_norm": 5.50201416015625, "learning_rate": 2.8437931034482764e-06, "loss": 0.2748, "step": 135125 }, { "epoch": 1.33, "grad_norm": 7.213361740112305, "learning_rate": 2.8433620689655174e-06, "loss": 0.1479, "step": 135150 }, { "epoch": 1.33, "grad_norm": 4.0518693923950195, "learning_rate": 2.842931034482759e-06, "loss": 0.3186, "step": 135175 }, { "epoch": 1.33, "grad_norm": 14.51462173461914, "learning_rate": 2.8425000000000003e-06, "loss": 0.1349, "step": 135200 }, { "epoch": 1.33, "grad_norm": 5.198193550109863, "learning_rate": 2.8420689655172418e-06, "loss": 0.2909, "step": 135225 }, { "epoch": 1.33, "grad_norm": 9.512296676635742, "learning_rate": 2.8416379310344832e-06, "loss": 0.1279, "step": 135250 }, { "epoch": 1.33, "grad_norm": 3.7764999866485596, "learning_rate": 2.8412068965517243e-06, "loss": 0.2947, "step": 135275 }, { "epoch": 1.33, "grad_norm": 11.265081405639648, "learning_rate": 2.8407758620689657e-06, "loss": 0.1393, "step": 135300 }, { "epoch": 1.33, "grad_norm": 3.6920156478881836, "learning_rate": 2.840344827586207e-06, "loss": 0.2794, "step": 135325 }, { "epoch": 1.33, "grad_norm": 6.237368106842041, "learning_rate": 2.8399137931034486e-06, "loss": 0.145, "step": 135350 }, { "epoch": 1.33, "grad_norm": 7.285731792449951, "learning_rate": 2.8394827586206897e-06, "loss": 0.3343, "step": 135375 }, { "epoch": 1.33, "grad_norm": 16.805339813232422, "learning_rate": 2.839051724137931e-06, "loss": 0.1279, "step": 135400 }, { "epoch": 1.33, "grad_norm": 3.635619640350342, "learning_rate": 2.8386206896551726e-06, "loss": 0.3396, "step": 135425 }, { "epoch": 1.33, "grad_norm": 12.960861206054688, "learning_rate": 2.838189655172414e-06, "loss": 0.1383, "step": 135450 }, { "epoch": 1.33, "grad_norm": 4.255285263061523, "learning_rate": 2.837758620689655e-06, "loss": 0.3498, "step": 135475 }, { "epoch": 1.33, "grad_norm": 9.461050033569336, "learning_rate": 2.8373275862068965e-06, "loss": 0.1664, "step": 135500 }, { "epoch": 1.33, "grad_norm": 7.100996017456055, "learning_rate": 2.836896551724138e-06, "loss": 0.2672, "step": 135525 }, { "epoch": 1.33, "grad_norm": 6.719037055969238, "learning_rate": 2.8364655172413794e-06, "loss": 0.1374, "step": 135550 }, { "epoch": 1.33, "grad_norm": 4.088957786560059, "learning_rate": 2.8360344827586213e-06, "loss": 0.2845, "step": 135575 }, { "epoch": 1.33, "grad_norm": 9.041851997375488, "learning_rate": 2.835603448275862e-06, "loss": 0.1394, "step": 135600 }, { "epoch": 1.33, "grad_norm": 4.232125759124756, "learning_rate": 2.835172413793104e-06, "loss": 0.2248, "step": 135625 }, { "epoch": 1.33, "grad_norm": 12.63174819946289, "learning_rate": 2.8347413793103453e-06, "loss": 0.1702, "step": 135650 }, { "epoch": 1.33, "grad_norm": 4.969602108001709, "learning_rate": 2.8343103448275867e-06, "loss": 0.3397, "step": 135675 }, { "epoch": 1.33, "grad_norm": 9.283196449279785, "learning_rate": 2.8338793103448277e-06, "loss": 0.1508, "step": 135700 }, { "epoch": 1.33, "grad_norm": 4.786964416503906, "learning_rate": 2.833448275862069e-06, "loss": 0.3131, "step": 135725 }, { "epoch": 1.34, "grad_norm": 5.493376731872559, "learning_rate": 2.8330172413793107e-06, "loss": 0.1495, "step": 135750 }, { "epoch": 1.34, "grad_norm": 3.3340232372283936, "learning_rate": 2.832586206896552e-06, "loss": 0.295, "step": 135775 }, { "epoch": 1.34, "grad_norm": 12.152433395385742, "learning_rate": 2.832172413793104e-06, "loss": 0.1575, "step": 135800 }, { "epoch": 1.34, "grad_norm": 4.499579429626465, "learning_rate": 2.831741379310345e-06, "loss": 0.272, "step": 135825 }, { "epoch": 1.34, "grad_norm": 8.798830032348633, "learning_rate": 2.8313103448275865e-06, "loss": 0.1355, "step": 135850 }, { "epoch": 1.34, "grad_norm": 5.648239612579346, "learning_rate": 2.830879310344828e-06, "loss": 0.3073, "step": 135875 }, { "epoch": 1.34, "grad_norm": 10.242704391479492, "learning_rate": 2.8304482758620694e-06, "loss": 0.1413, "step": 135900 }, { "epoch": 1.34, "grad_norm": 4.470166206359863, "learning_rate": 2.8300172413793104e-06, "loss": 0.2642, "step": 135925 }, { "epoch": 1.34, "grad_norm": 7.015706539154053, "learning_rate": 2.829586206896552e-06, "loss": 0.1716, "step": 135950 }, { "epoch": 1.34, "grad_norm": 4.920144081115723, "learning_rate": 2.8291551724137933e-06, "loss": 0.2378, "step": 135975 }, { "epoch": 1.34, "grad_norm": 7.353917598724365, "learning_rate": 2.828724137931035e-06, "loss": 0.1685, "step": 136000 }, { "epoch": 1.34, "grad_norm": 3.8427248001098633, "learning_rate": 2.8282931034482763e-06, "loss": 0.2533, "step": 136025 }, { "epoch": 1.34, "grad_norm": 6.394281387329102, "learning_rate": 2.8278620689655173e-06, "loss": 0.1302, "step": 136050 }, { "epoch": 1.34, "grad_norm": 4.109683513641357, "learning_rate": 2.8274310344827588e-06, "loss": 0.2668, "step": 136075 }, { "epoch": 1.34, "grad_norm": 6.6450514793396, "learning_rate": 2.827e-06, "loss": 0.1232, "step": 136100 }, { "epoch": 1.34, "grad_norm": 3.519195795059204, "learning_rate": 2.8265689655172417e-06, "loss": 0.2415, "step": 136125 }, { "epoch": 1.34, "grad_norm": 9.961033821105957, "learning_rate": 2.8261379310344827e-06, "loss": 0.1257, "step": 136150 }, { "epoch": 1.34, "grad_norm": 4.977418899536133, "learning_rate": 2.825706896551724e-06, "loss": 0.3142, "step": 136175 }, { "epoch": 1.34, "grad_norm": 8.283841133117676, "learning_rate": 2.8252758620689656e-06, "loss": 0.1481, "step": 136200 }, { "epoch": 1.34, "grad_norm": 5.116231441497803, "learning_rate": 2.824844827586207e-06, "loss": 0.2658, "step": 136225 }, { "epoch": 1.34, "grad_norm": 7.589305400848389, "learning_rate": 2.824413793103449e-06, "loss": 0.1665, "step": 136250 }, { "epoch": 1.34, "grad_norm": 5.327960014343262, "learning_rate": 2.8239827586206896e-06, "loss": 0.2775, "step": 136275 }, { "epoch": 1.34, "grad_norm": 6.887369155883789, "learning_rate": 2.823551724137931e-06, "loss": 0.1425, "step": 136300 }, { "epoch": 1.34, "grad_norm": 3.7179791927337646, "learning_rate": 2.823120689655173e-06, "loss": 0.2262, "step": 136325 }, { "epoch": 1.34, "grad_norm": 5.641063213348389, "learning_rate": 2.8226896551724143e-06, "loss": 0.1643, "step": 136350 }, { "epoch": 1.34, "grad_norm": 4.149481773376465, "learning_rate": 2.8222586206896554e-06, "loss": 0.2672, "step": 136375 }, { "epoch": 1.34, "grad_norm": 5.995400905609131, "learning_rate": 2.821827586206897e-06, "loss": 0.1335, "step": 136400 }, { "epoch": 1.34, "grad_norm": 5.303042411804199, "learning_rate": 2.8213965517241383e-06, "loss": 0.305, "step": 136425 }, { "epoch": 1.34, "grad_norm": 12.462788581848145, "learning_rate": 2.8209655172413798e-06, "loss": 0.1356, "step": 136450 }, { "epoch": 1.34, "grad_norm": 3.6034815311431885, "learning_rate": 2.820534482758621e-06, "loss": 0.3635, "step": 136475 }, { "epoch": 1.34, "grad_norm": 6.1948981285095215, "learning_rate": 2.8201034482758622e-06, "loss": 0.1378, "step": 136500 }, { "epoch": 1.34, "grad_norm": 3.7872865200042725, "learning_rate": 2.8196724137931037e-06, "loss": 0.2489, "step": 136525 }, { "epoch": 1.34, "grad_norm": 6.912138938903809, "learning_rate": 2.819241379310345e-06, "loss": 0.1418, "step": 136550 }, { "epoch": 1.34, "grad_norm": 4.666214466094971, "learning_rate": 2.8188103448275866e-06, "loss": 0.3192, "step": 136575 }, { "epoch": 1.34, "grad_norm": 10.527531623840332, "learning_rate": 2.8183793103448276e-06, "loss": 0.1265, "step": 136600 }, { "epoch": 1.34, "grad_norm": 3.589970827102661, "learning_rate": 2.817948275862069e-06, "loss": 0.3207, "step": 136625 }, { "epoch": 1.34, "grad_norm": 5.095157623291016, "learning_rate": 2.8175172413793106e-06, "loss": 0.1594, "step": 136650 }, { "epoch": 1.34, "grad_norm": 4.7501139640808105, "learning_rate": 2.817086206896552e-06, "loss": 0.354, "step": 136675 }, { "epoch": 1.34, "grad_norm": 9.559643745422363, "learning_rate": 2.8166551724137935e-06, "loss": 0.1216, "step": 136700 }, { "epoch": 1.34, "grad_norm": 4.227741718292236, "learning_rate": 2.8162241379310345e-06, "loss": 0.3124, "step": 136725 }, { "epoch": 1.34, "grad_norm": 6.7969512939453125, "learning_rate": 2.815793103448276e-06, "loss": 0.1424, "step": 136750 }, { "epoch": 1.35, "grad_norm": 4.079679012298584, "learning_rate": 2.8153620689655174e-06, "loss": 0.3046, "step": 136775 }, { "epoch": 1.35, "grad_norm": 7.34274959564209, "learning_rate": 2.814931034482759e-06, "loss": 0.1766, "step": 136800 }, { "epoch": 1.35, "grad_norm": 4.973370552062988, "learning_rate": 2.8145e-06, "loss": 0.3408, "step": 136825 }, { "epoch": 1.35, "grad_norm": 10.88292407989502, "learning_rate": 2.8140689655172414e-06, "loss": 0.144, "step": 136850 }, { "epoch": 1.35, "grad_norm": 4.866835117340088, "learning_rate": 2.813637931034483e-06, "loss": 0.3065, "step": 136875 }, { "epoch": 1.35, "grad_norm": 8.5775146484375, "learning_rate": 2.8132068965517247e-06, "loss": 0.1242, "step": 136900 }, { "epoch": 1.35, "grad_norm": 4.698543071746826, "learning_rate": 2.812775862068966e-06, "loss": 0.2855, "step": 136925 }, { "epoch": 1.35, "grad_norm": 12.514504432678223, "learning_rate": 2.8123448275862068e-06, "loss": 0.1428, "step": 136950 }, { "epoch": 1.35, "grad_norm": 5.986462116241455, "learning_rate": 2.8119137931034486e-06, "loss": 0.2585, "step": 136975 }, { "epoch": 1.35, "grad_norm": 10.747640609741211, "learning_rate": 2.81148275862069e-06, "loss": 0.1315, "step": 137000 }, { "epoch": 1.35, "grad_norm": 4.77714729309082, "learning_rate": 2.8110517241379316e-06, "loss": 0.2676, "step": 137025 }, { "epoch": 1.35, "grad_norm": 0.8533857464790344, "learning_rate": 2.8106206896551726e-06, "loss": 0.1305, "step": 137050 }, { "epoch": 1.35, "grad_norm": 4.101401329040527, "learning_rate": 2.810189655172414e-06, "loss": 0.2967, "step": 137075 }, { "epoch": 1.35, "grad_norm": 11.066786766052246, "learning_rate": 2.8097586206896555e-06, "loss": 0.1467, "step": 137100 }, { "epoch": 1.35, "grad_norm": 4.571308135986328, "learning_rate": 2.809327586206897e-06, "loss": 0.3064, "step": 137125 }, { "epoch": 1.35, "grad_norm": 5.756674766540527, "learning_rate": 2.808896551724138e-06, "loss": 0.1632, "step": 137150 }, { "epoch": 1.35, "grad_norm": 4.427364826202393, "learning_rate": 2.8084655172413794e-06, "loss": 0.2826, "step": 137175 }, { "epoch": 1.35, "grad_norm": 6.729334354400635, "learning_rate": 2.808034482758621e-06, "loss": 0.1353, "step": 137200 }, { "epoch": 1.35, "grad_norm": 7.802757263183594, "learning_rate": 2.8076034482758624e-06, "loss": 0.2438, "step": 137225 }, { "epoch": 1.35, "grad_norm": 16.53919219970703, "learning_rate": 2.807172413793104e-06, "loss": 0.1506, "step": 137250 }, { "epoch": 1.35, "grad_norm": 5.113104343414307, "learning_rate": 2.806741379310345e-06, "loss": 0.2697, "step": 137275 }, { "epoch": 1.35, "grad_norm": 10.161168098449707, "learning_rate": 2.8063103448275863e-06, "loss": 0.2058, "step": 137300 }, { "epoch": 1.35, "grad_norm": 3.5522379875183105, "learning_rate": 2.8058793103448278e-06, "loss": 0.3241, "step": 137325 }, { "epoch": 1.35, "grad_norm": 10.195577621459961, "learning_rate": 2.8054482758620692e-06, "loss": 0.128, "step": 137350 }, { "epoch": 1.35, "grad_norm": 3.6975009441375732, "learning_rate": 2.8050172413793103e-06, "loss": 0.2836, "step": 137375 }, { "epoch": 1.35, "grad_norm": 4.280025959014893, "learning_rate": 2.8045862068965517e-06, "loss": 0.1594, "step": 137400 }, { "epoch": 1.35, "grad_norm": 4.98085880279541, "learning_rate": 2.804155172413793e-06, "loss": 0.2315, "step": 137425 }, { "epoch": 1.35, "grad_norm": 9.246023178100586, "learning_rate": 2.8037241379310346e-06, "loss": 0.1739, "step": 137450 }, { "epoch": 1.35, "grad_norm": 5.0977559089660645, "learning_rate": 2.8032931034482765e-06, "loss": 0.2707, "step": 137475 }, { "epoch": 1.35, "grad_norm": 12.266904830932617, "learning_rate": 2.802862068965517e-06, "loss": 0.1588, "step": 137500 }, { "epoch": 1.35, "grad_norm": 5.635280132293701, "learning_rate": 2.8024310344827586e-06, "loss": 0.3543, "step": 137525 }, { "epoch": 1.35, "grad_norm": 5.936238765716553, "learning_rate": 2.8020000000000004e-06, "loss": 0.1703, "step": 137550 }, { "epoch": 1.35, "grad_norm": 5.962284564971924, "learning_rate": 2.801568965517242e-06, "loss": 0.3002, "step": 137575 }, { "epoch": 1.35, "grad_norm": 2.5598604679107666, "learning_rate": 2.801137931034483e-06, "loss": 0.1344, "step": 137600 }, { "epoch": 1.35, "grad_norm": 5.017495155334473, "learning_rate": 2.8007068965517244e-06, "loss": 0.2807, "step": 137625 }, { "epoch": 1.35, "grad_norm": 10.631511688232422, "learning_rate": 2.800275862068966e-06, "loss": 0.1512, "step": 137650 }, { "epoch": 1.35, "grad_norm": 6.63815450668335, "learning_rate": 2.7998448275862073e-06, "loss": 0.2984, "step": 137675 }, { "epoch": 1.35, "grad_norm": 9.2888765335083, "learning_rate": 2.7994137931034488e-06, "loss": 0.1512, "step": 137700 }, { "epoch": 1.35, "grad_norm": 4.879033088684082, "learning_rate": 2.79898275862069e-06, "loss": 0.2875, "step": 137725 }, { "epoch": 1.35, "grad_norm": 7.380196571350098, "learning_rate": 2.7985517241379313e-06, "loss": 0.148, "step": 137750 }, { "epoch": 1.36, "grad_norm": 3.9697916507720947, "learning_rate": 2.7981206896551727e-06, "loss": 0.2865, "step": 137775 }, { "epoch": 1.36, "grad_norm": 4.8892822265625, "learning_rate": 2.797689655172414e-06, "loss": 0.1354, "step": 137800 }, { "epoch": 1.36, "grad_norm": 3.807408571243286, "learning_rate": 2.797258620689655e-06, "loss": 0.2377, "step": 137825 }, { "epoch": 1.36, "grad_norm": 10.585224151611328, "learning_rate": 2.7968275862068967e-06, "loss": 0.1235, "step": 137850 }, { "epoch": 1.36, "grad_norm": 4.030787944793701, "learning_rate": 2.796396551724138e-06, "loss": 0.2361, "step": 137875 }, { "epoch": 1.36, "grad_norm": 9.782261848449707, "learning_rate": 2.7959655172413796e-06, "loss": 0.1708, "step": 137900 }, { "epoch": 1.36, "grad_norm": 4.930845260620117, "learning_rate": 2.795534482758621e-06, "loss": 0.3191, "step": 137925 }, { "epoch": 1.36, "grad_norm": 12.919523239135742, "learning_rate": 2.795103448275862e-06, "loss": 0.162, "step": 137950 }, { "epoch": 1.36, "grad_norm": 11.043449401855469, "learning_rate": 2.7946724137931035e-06, "loss": 0.3329, "step": 137975 }, { "epoch": 1.36, "grad_norm": 11.03730297088623, "learning_rate": 2.794241379310345e-06, "loss": 0.1324, "step": 138000 }, { "epoch": 1.36, "grad_norm": 4.631793022155762, "learning_rate": 2.7938103448275864e-06, "loss": 0.2458, "step": 138025 }, { "epoch": 1.36, "grad_norm": 9.601707458496094, "learning_rate": 2.7933793103448275e-06, "loss": 0.153, "step": 138050 }, { "epoch": 1.36, "grad_norm": 4.9829182624816895, "learning_rate": 2.792948275862069e-06, "loss": 0.2532, "step": 138075 }, { "epoch": 1.36, "grad_norm": 10.706036567687988, "learning_rate": 2.7925172413793104e-06, "loss": 0.1481, "step": 138100 }, { "epoch": 1.36, "grad_norm": 4.1928253173828125, "learning_rate": 2.7920862068965523e-06, "loss": 0.2683, "step": 138125 }, { "epoch": 1.36, "grad_norm": 8.524100303649902, "learning_rate": 2.7916551724137937e-06, "loss": 0.1771, "step": 138150 }, { "epoch": 1.36, "grad_norm": 4.193044662475586, "learning_rate": 2.7912241379310343e-06, "loss": 0.2778, "step": 138175 }, { "epoch": 1.36, "grad_norm": 9.257637023925781, "learning_rate": 2.790793103448276e-06, "loss": 0.1444, "step": 138200 }, { "epoch": 1.36, "grad_norm": 5.422263145446777, "learning_rate": 2.7903620689655177e-06, "loss": 0.2745, "step": 138225 }, { "epoch": 1.36, "grad_norm": 9.039228439331055, "learning_rate": 2.789931034482759e-06, "loss": 0.134, "step": 138250 }, { "epoch": 1.36, "grad_norm": 3.694080114364624, "learning_rate": 2.7895e-06, "loss": 0.2652, "step": 138275 }, { "epoch": 1.36, "grad_norm": 8.579140663146973, "learning_rate": 2.7890689655172416e-06, "loss": 0.1139, "step": 138300 }, { "epoch": 1.36, "grad_norm": 7.287327766418457, "learning_rate": 2.788637931034483e-06, "loss": 0.2644, "step": 138325 }, { "epoch": 1.36, "grad_norm": 5.289127826690674, "learning_rate": 2.7882068965517245e-06, "loss": 0.1617, "step": 138350 }, { "epoch": 1.36, "grad_norm": 4.921135902404785, "learning_rate": 2.787775862068966e-06, "loss": 0.2954, "step": 138375 }, { "epoch": 1.36, "grad_norm": 11.425599098205566, "learning_rate": 2.787344827586207e-06, "loss": 0.1424, "step": 138400 }, { "epoch": 1.36, "grad_norm": 3.6476635932922363, "learning_rate": 2.7869137931034485e-06, "loss": 0.2394, "step": 138425 }, { "epoch": 1.36, "grad_norm": 6.503156661987305, "learning_rate": 2.78648275862069e-06, "loss": 0.1613, "step": 138450 }, { "epoch": 1.36, "grad_norm": 4.931598663330078, "learning_rate": 2.7860517241379314e-06, "loss": 0.3079, "step": 138475 }, { "epoch": 1.36, "grad_norm": 7.089020252227783, "learning_rate": 2.7856206896551724e-06, "loss": 0.1658, "step": 138500 }, { "epoch": 1.36, "grad_norm": 5.891905307769775, "learning_rate": 2.785189655172414e-06, "loss": 0.2659, "step": 138525 }, { "epoch": 1.36, "grad_norm": 8.064496994018555, "learning_rate": 2.7847586206896553e-06, "loss": 0.1148, "step": 138550 }, { "epoch": 1.36, "grad_norm": 6.812765598297119, "learning_rate": 2.7843275862068968e-06, "loss": 0.2611, "step": 138575 }, { "epoch": 1.36, "grad_norm": 8.092068672180176, "learning_rate": 2.7838965517241382e-06, "loss": 0.1373, "step": 138600 }, { "epoch": 1.36, "grad_norm": 4.833127498626709, "learning_rate": 2.7834655172413793e-06, "loss": 0.2892, "step": 138625 }, { "epoch": 1.36, "grad_norm": 15.987662315368652, "learning_rate": 2.7830344827586207e-06, "loss": 0.1142, "step": 138650 }, { "epoch": 1.36, "grad_norm": 3.906860113143921, "learning_rate": 2.782603448275862e-06, "loss": 0.2753, "step": 138675 }, { "epoch": 1.36, "grad_norm": 13.704325675964355, "learning_rate": 2.782172413793104e-06, "loss": 0.1347, "step": 138700 }, { "epoch": 1.36, "grad_norm": 4.953297138214111, "learning_rate": 2.7817413793103447e-06, "loss": 0.2752, "step": 138725 }, { "epoch": 1.36, "grad_norm": 6.566116809844971, "learning_rate": 2.781310344827586e-06, "loss": 0.1013, "step": 138750 }, { "epoch": 1.36, "grad_norm": 6.279494762420654, "learning_rate": 2.780879310344828e-06, "loss": 0.3504, "step": 138775 }, { "epoch": 1.37, "grad_norm": 8.185515403747559, "learning_rate": 2.7804482758620695e-06, "loss": 0.1594, "step": 138800 }, { "epoch": 1.37, "grad_norm": 3.5253820419311523, "learning_rate": 2.780017241379311e-06, "loss": 0.299, "step": 138825 }, { "epoch": 1.37, "grad_norm": 3.752148389816284, "learning_rate": 2.779586206896552e-06, "loss": 0.1554, "step": 138850 }, { "epoch": 1.37, "grad_norm": 5.211942672729492, "learning_rate": 2.7791551724137934e-06, "loss": 0.254, "step": 138875 }, { "epoch": 1.37, "grad_norm": 10.78726863861084, "learning_rate": 2.778724137931035e-06, "loss": 0.138, "step": 138900 }, { "epoch": 1.37, "grad_norm": 4.326992988586426, "learning_rate": 2.7782931034482763e-06, "loss": 0.3553, "step": 138925 }, { "epoch": 1.37, "grad_norm": 10.850645065307617, "learning_rate": 2.7778620689655173e-06, "loss": 0.1413, "step": 138950 }, { "epoch": 1.37, "grad_norm": 7.78289794921875, "learning_rate": 2.777431034482759e-06, "loss": 0.2607, "step": 138975 }, { "epoch": 1.37, "grad_norm": 7.8885579109191895, "learning_rate": 2.7770000000000003e-06, "loss": 0.1301, "step": 139000 }, { "epoch": 1.37, "grad_norm": 5.057398319244385, "learning_rate": 2.7765689655172417e-06, "loss": 0.2904, "step": 139025 }, { "epoch": 1.37, "grad_norm": 10.02153205871582, "learning_rate": 2.776137931034483e-06, "loss": 0.1566, "step": 139050 }, { "epoch": 1.37, "grad_norm": 6.702133655548096, "learning_rate": 2.775706896551724e-06, "loss": 0.334, "step": 139075 }, { "epoch": 1.37, "grad_norm": 3.4965877532958984, "learning_rate": 2.7752758620689657e-06, "loss": 0.1043, "step": 139100 }, { "epoch": 1.37, "grad_norm": 3.0512373447418213, "learning_rate": 2.774844827586207e-06, "loss": 0.2673, "step": 139125 }, { "epoch": 1.37, "grad_norm": 6.708845615386963, "learning_rate": 2.7744137931034486e-06, "loss": 0.1325, "step": 139150 }, { "epoch": 1.37, "grad_norm": 9.056169509887695, "learning_rate": 2.7739827586206896e-06, "loss": 0.3095, "step": 139175 }, { "epoch": 1.37, "grad_norm": 3.7614893913269043, "learning_rate": 2.773551724137931e-06, "loss": 0.1283, "step": 139200 }, { "epoch": 1.37, "grad_norm": 5.450809955596924, "learning_rate": 2.7731206896551725e-06, "loss": 0.2602, "step": 139225 }, { "epoch": 1.37, "grad_norm": 7.247802734375, "learning_rate": 2.772689655172414e-06, "loss": 0.1486, "step": 139250 }, { "epoch": 1.37, "grad_norm": 4.329941272735596, "learning_rate": 2.772258620689655e-06, "loss": 0.309, "step": 139275 }, { "epoch": 1.37, "grad_norm": 11.352745056152344, "learning_rate": 2.7718275862068965e-06, "loss": 0.1592, "step": 139300 }, { "epoch": 1.37, "grad_norm": 7.045497894287109, "learning_rate": 2.771396551724138e-06, "loss": 0.3242, "step": 139325 }, { "epoch": 1.37, "grad_norm": 15.850481033325195, "learning_rate": 2.77096551724138e-06, "loss": 0.1307, "step": 139350 }, { "epoch": 1.37, "grad_norm": 3.897831678390503, "learning_rate": 2.7705344827586213e-06, "loss": 0.2297, "step": 139375 }, { "epoch": 1.37, "grad_norm": 7.442441463470459, "learning_rate": 2.770103448275862e-06, "loss": 0.1354, "step": 139400 }, { "epoch": 1.37, "grad_norm": 3.9902138710021973, "learning_rate": 2.7696724137931038e-06, "loss": 0.2509, "step": 139425 }, { "epoch": 1.37, "grad_norm": 5.140393257141113, "learning_rate": 2.769241379310345e-06, "loss": 0.1442, "step": 139450 }, { "epoch": 1.37, "grad_norm": 7.178745746612549, "learning_rate": 2.7688103448275867e-06, "loss": 0.3175, "step": 139475 }, { "epoch": 1.37, "grad_norm": 10.948570251464844, "learning_rate": 2.7683793103448277e-06, "loss": 0.1731, "step": 139500 }, { "epoch": 1.37, "grad_norm": 4.8676042556762695, "learning_rate": 2.767948275862069e-06, "loss": 0.3052, "step": 139525 }, { "epoch": 1.37, "grad_norm": 7.465070724487305, "learning_rate": 2.7675172413793106e-06, "loss": 0.1281, "step": 139550 }, { "epoch": 1.37, "grad_norm": 6.104016304016113, "learning_rate": 2.767086206896552e-06, "loss": 0.3161, "step": 139575 }, { "epoch": 1.37, "grad_norm": 11.669439315795898, "learning_rate": 2.7666551724137935e-06, "loss": 0.1745, "step": 139600 }, { "epoch": 1.37, "grad_norm": 6.145212173461914, "learning_rate": 2.7662241379310346e-06, "loss": 0.2891, "step": 139625 }, { "epoch": 1.37, "grad_norm": 6.910345077514648, "learning_rate": 2.765793103448276e-06, "loss": 0.1393, "step": 139650 }, { "epoch": 1.37, "grad_norm": 4.007335186004639, "learning_rate": 2.7653620689655175e-06, "loss": 0.2865, "step": 139675 }, { "epoch": 1.37, "grad_norm": 7.5959553718566895, "learning_rate": 2.764931034482759e-06, "loss": 0.1559, "step": 139700 }, { "epoch": 1.37, "grad_norm": 4.552664756774902, "learning_rate": 2.7645e-06, "loss": 0.2495, "step": 139725 }, { "epoch": 1.37, "grad_norm": 9.20119857788086, "learning_rate": 2.7640689655172414e-06, "loss": 0.1513, "step": 139750 }, { "epoch": 1.37, "grad_norm": 4.470068454742432, "learning_rate": 2.763637931034483e-06, "loss": 0.1991, "step": 139775 }, { "epoch": 1.37, "grad_norm": 20.028648376464844, "learning_rate": 2.7632068965517243e-06, "loss": 0.1368, "step": 139800 }, { "epoch": 1.38, "grad_norm": 4.762571811676025, "learning_rate": 2.7627758620689658e-06, "loss": 0.2693, "step": 139825 }, { "epoch": 1.38, "grad_norm": 10.023582458496094, "learning_rate": 2.762344827586207e-06, "loss": 0.1174, "step": 139850 }, { "epoch": 1.38, "grad_norm": 4.169708251953125, "learning_rate": 2.7619137931034483e-06, "loss": 0.2903, "step": 139875 }, { "epoch": 1.38, "grad_norm": 15.375880241394043, "learning_rate": 2.7614827586206897e-06, "loss": 0.1334, "step": 139900 }, { "epoch": 1.38, "grad_norm": 4.775535583496094, "learning_rate": 2.7610689655172416e-06, "loss": 0.2582, "step": 139925 }, { "epoch": 1.38, "grad_norm": 11.23787784576416, "learning_rate": 2.7606379310344826e-06, "loss": 0.1591, "step": 139950 }, { "epoch": 1.38, "grad_norm": 4.9644036293029785, "learning_rate": 2.760206896551724e-06, "loss": 0.2812, "step": 139975 }, { "epoch": 1.38, "grad_norm": 14.04173469543457, "learning_rate": 2.7597758620689656e-06, "loss": 0.1513, "step": 140000 }, { "epoch": 1.38, "eval_loss": 0.5329837203025818, "eval_runtime": 5738.4139, "eval_samples_per_second": 1.65, "eval_steps_per_second": 0.206, "eval_wer": 0.12864546449494624, "step": 140000 }, { "epoch": 1.38, "grad_norm": 4.799410343170166, "learning_rate": 2.7593448275862074e-06, "loss": 0.3179, "step": 140025 }, { "epoch": 1.38, "grad_norm": 5.029453277587891, "learning_rate": 2.758913793103449e-06, "loss": 0.1485, "step": 140050 }, { "epoch": 1.38, "grad_norm": 7.112915515899658, "learning_rate": 2.7584827586206895e-06, "loss": 0.2998, "step": 140075 }, { "epoch": 1.38, "grad_norm": 8.919066429138184, "learning_rate": 2.7580517241379314e-06, "loss": 0.1364, "step": 140100 }, { "epoch": 1.38, "grad_norm": 3.575777292251587, "learning_rate": 2.757620689655173e-06, "loss": 0.2804, "step": 140125 }, { "epoch": 1.38, "grad_norm": 5.645355701446533, "learning_rate": 2.7571896551724143e-06, "loss": 0.1177, "step": 140150 }, { "epoch": 1.38, "grad_norm": 3.6116459369659424, "learning_rate": 2.7567586206896553e-06, "loss": 0.3284, "step": 140175 }, { "epoch": 1.38, "grad_norm": 5.928357124328613, "learning_rate": 2.7563275862068968e-06, "loss": 0.18, "step": 140200 }, { "epoch": 1.38, "grad_norm": 5.819464206695557, "learning_rate": 2.7558965517241382e-06, "loss": 0.2514, "step": 140225 }, { "epoch": 1.38, "grad_norm": 15.015626907348633, "learning_rate": 2.7554655172413797e-06, "loss": 0.1304, "step": 140250 }, { "epoch": 1.38, "grad_norm": 4.277591705322266, "learning_rate": 2.755034482758621e-06, "loss": 0.2881, "step": 140275 }, { "epoch": 1.38, "grad_norm": 11.189580917358398, "learning_rate": 2.754603448275862e-06, "loss": 0.1421, "step": 140300 }, { "epoch": 1.38, "grad_norm": 4.589609146118164, "learning_rate": 2.7541724137931036e-06, "loss": 0.2887, "step": 140325 }, { "epoch": 1.38, "grad_norm": 6.540769100189209, "learning_rate": 2.753741379310345e-06, "loss": 0.1341, "step": 140350 }, { "epoch": 1.38, "grad_norm": 3.620026111602783, "learning_rate": 2.7533103448275866e-06, "loss": 0.3068, "step": 140375 }, { "epoch": 1.38, "grad_norm": 11.229677200317383, "learning_rate": 2.7528793103448276e-06, "loss": 0.1266, "step": 140400 }, { "epoch": 1.38, "grad_norm": 3.4444329738616943, "learning_rate": 2.752448275862069e-06, "loss": 0.275, "step": 140425 }, { "epoch": 1.38, "grad_norm": 5.372973918914795, "learning_rate": 2.7520172413793105e-06, "loss": 0.1294, "step": 140450 }, { "epoch": 1.38, "grad_norm": 4.832029819488525, "learning_rate": 2.751586206896552e-06, "loss": 0.2677, "step": 140475 }, { "epoch": 1.38, "grad_norm": 9.248211860656738, "learning_rate": 2.7511551724137934e-06, "loss": 0.152, "step": 140500 }, { "epoch": 1.38, "grad_norm": 4.859995365142822, "learning_rate": 2.7507241379310345e-06, "loss": 0.257, "step": 140525 }, { "epoch": 1.38, "grad_norm": 18.150493621826172, "learning_rate": 2.750293103448276e-06, "loss": 0.1619, "step": 140550 }, { "epoch": 1.38, "grad_norm": 4.414128303527832, "learning_rate": 2.7498620689655174e-06, "loss": 0.2782, "step": 140575 }, { "epoch": 1.38, "grad_norm": 4.599401950836182, "learning_rate": 2.7494310344827592e-06, "loss": 0.1323, "step": 140600 }, { "epoch": 1.38, "grad_norm": 4.607368469238281, "learning_rate": 2.749e-06, "loss": 0.2886, "step": 140625 }, { "epoch": 1.38, "grad_norm": 12.778974533081055, "learning_rate": 2.7485689655172413e-06, "loss": 0.1348, "step": 140650 }, { "epoch": 1.38, "grad_norm": 5.523165225982666, "learning_rate": 2.748137931034483e-06, "loss": 0.2622, "step": 140675 }, { "epoch": 1.38, "grad_norm": 10.879575729370117, "learning_rate": 2.7477068965517246e-06, "loss": 0.1198, "step": 140700 }, { "epoch": 1.38, "grad_norm": 4.456562519073486, "learning_rate": 2.747275862068966e-06, "loss": 0.2995, "step": 140725 }, { "epoch": 1.38, "grad_norm": 6.936779022216797, "learning_rate": 2.746844827586207e-06, "loss": 0.1401, "step": 140750 }, { "epoch": 1.38, "grad_norm": 9.714559555053711, "learning_rate": 2.7464137931034486e-06, "loss": 0.2896, "step": 140775 }, { "epoch": 1.38, "grad_norm": 1.9221582412719727, "learning_rate": 2.74598275862069e-06, "loss": 0.1375, "step": 140800 }, { "epoch": 1.39, "grad_norm": 3.8037283420562744, "learning_rate": 2.7455517241379315e-06, "loss": 0.3257, "step": 140825 }, { "epoch": 1.39, "grad_norm": 17.8128662109375, "learning_rate": 2.7451206896551725e-06, "loss": 0.1676, "step": 140850 }, { "epoch": 1.39, "grad_norm": 4.427530288696289, "learning_rate": 2.744689655172414e-06, "loss": 0.3295, "step": 140875 }, { "epoch": 1.39, "grad_norm": 6.711950302124023, "learning_rate": 2.7442586206896555e-06, "loss": 0.1702, "step": 140900 }, { "epoch": 1.39, "grad_norm": 5.003377437591553, "learning_rate": 2.743827586206897e-06, "loss": 0.2905, "step": 140925 }, { "epoch": 1.39, "grad_norm": 9.320756912231445, "learning_rate": 2.743396551724138e-06, "loss": 0.1269, "step": 140950 }, { "epoch": 1.39, "grad_norm": 5.713409900665283, "learning_rate": 2.7429655172413794e-06, "loss": 0.2651, "step": 140975 }, { "epoch": 1.39, "grad_norm": 1.0310051441192627, "learning_rate": 2.742534482758621e-06, "loss": 0.1282, "step": 141000 }, { "epoch": 1.39, "grad_norm": 4.490190505981445, "learning_rate": 2.7421034482758623e-06, "loss": 0.2791, "step": 141025 }, { "epoch": 1.39, "grad_norm": 8.868620872497559, "learning_rate": 2.7416724137931038e-06, "loss": 0.1677, "step": 141050 }, { "epoch": 1.39, "grad_norm": 3.548945426940918, "learning_rate": 2.741241379310345e-06, "loss": 0.274, "step": 141075 }, { "epoch": 1.39, "grad_norm": 14.213027954101562, "learning_rate": 2.7408103448275863e-06, "loss": 0.1679, "step": 141100 }, { "epoch": 1.39, "grad_norm": 3.912853717803955, "learning_rate": 2.7403793103448277e-06, "loss": 0.3135, "step": 141125 }, { "epoch": 1.39, "grad_norm": 6.6427836418151855, "learning_rate": 2.739948275862069e-06, "loss": 0.1866, "step": 141150 }, { "epoch": 1.39, "grad_norm": 4.6088385581970215, "learning_rate": 2.73951724137931e-06, "loss": 0.2376, "step": 141175 }, { "epoch": 1.39, "grad_norm": 14.575798034667969, "learning_rate": 2.7390862068965517e-06, "loss": 0.1277, "step": 141200 }, { "epoch": 1.39, "grad_norm": 3.683213472366333, "learning_rate": 2.738655172413793e-06, "loss": 0.2678, "step": 141225 }, { "epoch": 1.39, "grad_norm": 7.417291641235352, "learning_rate": 2.738224137931035e-06, "loss": 0.1501, "step": 141250 }, { "epoch": 1.39, "grad_norm": 6.447782039642334, "learning_rate": 2.7377931034482765e-06, "loss": 0.2719, "step": 141275 }, { "epoch": 1.39, "grad_norm": 5.947594165802002, "learning_rate": 2.737362068965517e-06, "loss": 0.128, "step": 141300 }, { "epoch": 1.39, "grad_norm": 3.5697081089019775, "learning_rate": 2.736931034482759e-06, "loss": 0.2721, "step": 141325 }, { "epoch": 1.39, "grad_norm": 10.65772819519043, "learning_rate": 2.7365000000000004e-06, "loss": 0.1587, "step": 141350 }, { "epoch": 1.39, "grad_norm": 5.412086009979248, "learning_rate": 2.736068965517242e-06, "loss": 0.2504, "step": 141375 }, { "epoch": 1.39, "grad_norm": 9.420551300048828, "learning_rate": 2.735637931034483e-06, "loss": 0.1506, "step": 141400 }, { "epoch": 1.39, "grad_norm": 3.379650115966797, "learning_rate": 2.7352068965517243e-06, "loss": 0.2398, "step": 141425 }, { "epoch": 1.39, "grad_norm": 9.090912818908691, "learning_rate": 2.734775862068966e-06, "loss": 0.1864, "step": 141450 }, { "epoch": 1.39, "grad_norm": 4.223816394805908, "learning_rate": 2.7343448275862073e-06, "loss": 0.2946, "step": 141475 }, { "epoch": 1.39, "grad_norm": 8.368561744689941, "learning_rate": 2.7339137931034487e-06, "loss": 0.1298, "step": 141500 }, { "epoch": 1.39, "grad_norm": 3.759185552597046, "learning_rate": 2.7334827586206897e-06, "loss": 0.2577, "step": 141525 }, { "epoch": 1.39, "grad_norm": 9.770045280456543, "learning_rate": 2.733051724137931e-06, "loss": 0.1276, "step": 141550 }, { "epoch": 1.39, "grad_norm": 5.441009521484375, "learning_rate": 2.7326206896551727e-06, "loss": 0.2601, "step": 141575 }, { "epoch": 1.39, "grad_norm": 15.396409034729004, "learning_rate": 2.732189655172414e-06, "loss": 0.1523, "step": 141600 }, { "epoch": 1.39, "grad_norm": 4.245835781097412, "learning_rate": 2.731758620689655e-06, "loss": 0.3035, "step": 141625 }, { "epoch": 1.39, "grad_norm": 9.022344589233398, "learning_rate": 2.7313275862068966e-06, "loss": 0.1257, "step": 141650 }, { "epoch": 1.39, "grad_norm": 3.8140647411346436, "learning_rate": 2.730896551724138e-06, "loss": 0.3052, "step": 141675 }, { "epoch": 1.39, "grad_norm": 9.276304244995117, "learning_rate": 2.7304655172413795e-06, "loss": 0.1561, "step": 141700 }, { "epoch": 1.39, "grad_norm": 4.501999378204346, "learning_rate": 2.730034482758621e-06, "loss": 0.2639, "step": 141725 }, { "epoch": 1.39, "grad_norm": 9.7236909866333, "learning_rate": 2.729603448275862e-06, "loss": 0.1544, "step": 141750 }, { "epoch": 1.39, "grad_norm": 5.217606067657471, "learning_rate": 2.7291724137931035e-06, "loss": 0.2773, "step": 141775 }, { "epoch": 1.39, "grad_norm": 6.356215000152588, "learning_rate": 2.728741379310345e-06, "loss": 0.1511, "step": 141800 }, { "epoch": 1.39, "grad_norm": 4.050751686096191, "learning_rate": 2.728310344827587e-06, "loss": 0.2731, "step": 141825 }, { "epoch": 1.4, "grad_norm": 7.870367050170898, "learning_rate": 2.7278793103448274e-06, "loss": 0.1479, "step": 141850 }, { "epoch": 1.4, "grad_norm": 4.864058971405029, "learning_rate": 2.727448275862069e-06, "loss": 0.3272, "step": 141875 }, { "epoch": 1.4, "grad_norm": 7.582915306091309, "learning_rate": 2.7270172413793107e-06, "loss": 0.1675, "step": 141900 }, { "epoch": 1.4, "grad_norm": 5.287666320800781, "learning_rate": 2.726586206896552e-06, "loss": 0.3242, "step": 141925 }, { "epoch": 1.4, "grad_norm": 7.833051681518555, "learning_rate": 2.7261551724137937e-06, "loss": 0.1358, "step": 141950 }, { "epoch": 1.4, "grad_norm": 3.856572389602661, "learning_rate": 2.7257413793103447e-06, "loss": 0.2627, "step": 141975 }, { "epoch": 1.4, "grad_norm": 6.507933616638184, "learning_rate": 2.7253103448275866e-06, "loss": 0.131, "step": 142000 }, { "epoch": 1.4, "grad_norm": 5.673619270324707, "learning_rate": 2.724879310344828e-06, "loss": 0.2576, "step": 142025 }, { "epoch": 1.4, "grad_norm": 8.866215705871582, "learning_rate": 2.7244482758620695e-06, "loss": 0.1396, "step": 142050 }, { "epoch": 1.4, "grad_norm": 7.222060680389404, "learning_rate": 2.7240172413793105e-06, "loss": 0.3255, "step": 142075 }, { "epoch": 1.4, "grad_norm": 9.264519691467285, "learning_rate": 2.723586206896552e-06, "loss": 0.1271, "step": 142100 }, { "epoch": 1.4, "grad_norm": 4.938117980957031, "learning_rate": 2.7231551724137934e-06, "loss": 0.2734, "step": 142125 }, { "epoch": 1.4, "grad_norm": 7.534049034118652, "learning_rate": 2.722724137931035e-06, "loss": 0.1462, "step": 142150 }, { "epoch": 1.4, "grad_norm": 3.6654255390167236, "learning_rate": 2.7222931034482763e-06, "loss": 0.2669, "step": 142175 }, { "epoch": 1.4, "grad_norm": 12.585051536560059, "learning_rate": 2.7218620689655174e-06, "loss": 0.1405, "step": 142200 }, { "epoch": 1.4, "grad_norm": 4.457161903381348, "learning_rate": 2.721431034482759e-06, "loss": 0.3253, "step": 142225 }, { "epoch": 1.4, "grad_norm": 9.21873664855957, "learning_rate": 2.7210000000000003e-06, "loss": 0.1497, "step": 142250 }, { "epoch": 1.4, "grad_norm": 3.6092758178710938, "learning_rate": 2.7205689655172417e-06, "loss": 0.2812, "step": 142275 }, { "epoch": 1.4, "grad_norm": 8.885229110717773, "learning_rate": 2.7201379310344828e-06, "loss": 0.1399, "step": 142300 }, { "epoch": 1.4, "grad_norm": 6.296778202056885, "learning_rate": 2.7197068965517242e-06, "loss": 0.2744, "step": 142325 }, { "epoch": 1.4, "grad_norm": 9.413312911987305, "learning_rate": 2.7192758620689657e-06, "loss": 0.1195, "step": 142350 }, { "epoch": 1.4, "grad_norm": 4.692013740539551, "learning_rate": 2.718844827586207e-06, "loss": 0.2462, "step": 142375 }, { "epoch": 1.4, "grad_norm": 8.289315223693848, "learning_rate": 2.7184137931034486e-06, "loss": 0.1761, "step": 142400 }, { "epoch": 1.4, "grad_norm": 4.254528522491455, "learning_rate": 2.7179827586206896e-06, "loss": 0.2766, "step": 142425 }, { "epoch": 1.4, "grad_norm": 6.3074116706848145, "learning_rate": 2.717551724137931e-06, "loss": 0.1559, "step": 142450 }, { "epoch": 1.4, "grad_norm": 4.759227275848389, "learning_rate": 2.7171206896551726e-06, "loss": 0.2868, "step": 142475 }, { "epoch": 1.4, "grad_norm": 10.31769847869873, "learning_rate": 2.7166896551724144e-06, "loss": 0.1548, "step": 142500 }, { "epoch": 1.4, "grad_norm": 5.479390621185303, "learning_rate": 2.716258620689655e-06, "loss": 0.2805, "step": 142525 }, { "epoch": 1.4, "grad_norm": 7.612848281860352, "learning_rate": 2.7158275862068965e-06, "loss": 0.1522, "step": 142550 }, { "epoch": 1.4, "grad_norm": 5.274345874786377, "learning_rate": 2.7153965517241384e-06, "loss": 0.322, "step": 142575 }, { "epoch": 1.4, "grad_norm": 6.919527053833008, "learning_rate": 2.71496551724138e-06, "loss": 0.1306, "step": 142600 }, { "epoch": 1.4, "grad_norm": 3.4959895610809326, "learning_rate": 2.7145344827586204e-06, "loss": 0.2741, "step": 142625 }, { "epoch": 1.4, "grad_norm": 7.908224582672119, "learning_rate": 2.7141034482758623e-06, "loss": 0.1355, "step": 142650 }, { "epoch": 1.4, "grad_norm": 4.037841320037842, "learning_rate": 2.7136724137931038e-06, "loss": 0.238, "step": 142675 }, { "epoch": 1.4, "grad_norm": 18.070241928100586, "learning_rate": 2.7132413793103452e-06, "loss": 0.1727, "step": 142700 }, { "epoch": 1.4, "grad_norm": 4.274970531463623, "learning_rate": 2.7128103448275867e-06, "loss": 0.2693, "step": 142725 }, { "epoch": 1.4, "grad_norm": 9.666197776794434, "learning_rate": 2.7123793103448277e-06, "loss": 0.163, "step": 142750 }, { "epoch": 1.4, "grad_norm": 3.2564380168914795, "learning_rate": 2.711948275862069e-06, "loss": 0.3179, "step": 142775 }, { "epoch": 1.4, "grad_norm": 14.527304649353027, "learning_rate": 2.7115172413793106e-06, "loss": 0.1249, "step": 142800 }, { "epoch": 1.4, "grad_norm": 5.906797885894775, "learning_rate": 2.711086206896552e-06, "loss": 0.2655, "step": 142825 }, { "epoch": 1.4, "grad_norm": 15.47893238067627, "learning_rate": 2.710655172413793e-06, "loss": 0.1449, "step": 142850 }, { "epoch": 1.41, "grad_norm": 4.924330711364746, "learning_rate": 2.7102241379310346e-06, "loss": 0.3151, "step": 142875 }, { "epoch": 1.41, "grad_norm": 4.816843032836914, "learning_rate": 2.709793103448276e-06, "loss": 0.1402, "step": 142900 }, { "epoch": 1.41, "grad_norm": 4.314466953277588, "learning_rate": 2.7093620689655175e-06, "loss": 0.2254, "step": 142925 }, { "epoch": 1.41, "grad_norm": 11.736096382141113, "learning_rate": 2.708931034482759e-06, "loss": 0.155, "step": 142950 }, { "epoch": 1.41, "grad_norm": 4.964182376861572, "learning_rate": 2.7085e-06, "loss": 0.2829, "step": 142975 }, { "epoch": 1.41, "grad_norm": 10.532227516174316, "learning_rate": 2.7080689655172414e-06, "loss": 0.1269, "step": 143000 }, { "epoch": 1.41, "grad_norm": 4.604647159576416, "learning_rate": 2.707637931034483e-06, "loss": 0.2926, "step": 143025 }, { "epoch": 1.41, "grad_norm": 5.644705772399902, "learning_rate": 2.7072068965517244e-06, "loss": 0.1763, "step": 143050 }, { "epoch": 1.41, "grad_norm": 7.041942119598389, "learning_rate": 2.7067758620689654e-06, "loss": 0.3053, "step": 143075 }, { "epoch": 1.41, "grad_norm": 10.275882720947266, "learning_rate": 2.706344827586207e-06, "loss": 0.1423, "step": 143100 }, { "epoch": 1.41, "grad_norm": 6.513019561767578, "learning_rate": 2.7059137931034483e-06, "loss": 0.2767, "step": 143125 }, { "epoch": 1.41, "grad_norm": 4.905014991760254, "learning_rate": 2.70548275862069e-06, "loss": 0.1516, "step": 143150 }, { "epoch": 1.41, "grad_norm": 4.248928546905518, "learning_rate": 2.7050517241379316e-06, "loss": 0.2683, "step": 143175 }, { "epoch": 1.41, "grad_norm": 12.597599983215332, "learning_rate": 2.7046206896551722e-06, "loss": 0.1745, "step": 143200 }, { "epoch": 1.41, "grad_norm": 3.896413564682007, "learning_rate": 2.704189655172414e-06, "loss": 0.2677, "step": 143225 }, { "epoch": 1.41, "grad_norm": 11.10087776184082, "learning_rate": 2.7037586206896556e-06, "loss": 0.1288, "step": 143250 }, { "epoch": 1.41, "grad_norm": 5.617203235626221, "learning_rate": 2.703327586206897e-06, "loss": 0.32, "step": 143275 }, { "epoch": 1.41, "grad_norm": 13.215171813964844, "learning_rate": 2.702896551724138e-06, "loss": 0.171, "step": 143300 }, { "epoch": 1.41, "grad_norm": 4.849651336669922, "learning_rate": 2.7024655172413795e-06, "loss": 0.2971, "step": 143325 }, { "epoch": 1.41, "grad_norm": 9.629617691040039, "learning_rate": 2.702034482758621e-06, "loss": 0.1368, "step": 143350 }, { "epoch": 1.41, "grad_norm": 3.853156328201294, "learning_rate": 2.7016034482758624e-06, "loss": 0.2541, "step": 143375 }, { "epoch": 1.41, "grad_norm": 5.96038818359375, "learning_rate": 2.701172413793104e-06, "loss": 0.1438, "step": 143400 }, { "epoch": 1.41, "grad_norm": 3.745218276977539, "learning_rate": 2.700741379310345e-06, "loss": 0.3126, "step": 143425 }, { "epoch": 1.41, "grad_norm": 12.898859024047852, "learning_rate": 2.7003103448275864e-06, "loss": 0.1441, "step": 143450 }, { "epoch": 1.41, "grad_norm": 4.457504749298096, "learning_rate": 2.699879310344828e-06, "loss": 0.2643, "step": 143475 }, { "epoch": 1.41, "grad_norm": 7.893121719360352, "learning_rate": 2.6994482758620693e-06, "loss": 0.1209, "step": 143500 }, { "epoch": 1.41, "grad_norm": 3.577171802520752, "learning_rate": 2.6990172413793103e-06, "loss": 0.2759, "step": 143525 }, { "epoch": 1.41, "grad_norm": 8.817961692810059, "learning_rate": 2.698586206896552e-06, "loss": 0.1648, "step": 143550 }, { "epoch": 1.41, "grad_norm": 12.989870071411133, "learning_rate": 2.6981551724137932e-06, "loss": 0.2665, "step": 143575 }, { "epoch": 1.41, "grad_norm": 12.333386421203613, "learning_rate": 2.6977241379310347e-06, "loss": 0.1401, "step": 143600 }, { "epoch": 1.41, "grad_norm": 4.823681831359863, "learning_rate": 2.697293103448276e-06, "loss": 0.2882, "step": 143625 }, { "epoch": 1.41, "grad_norm": 10.398364067077637, "learning_rate": 2.696862068965517e-06, "loss": 0.1802, "step": 143650 }, { "epoch": 1.41, "grad_norm": 4.350690841674805, "learning_rate": 2.6964310344827587e-06, "loss": 0.2603, "step": 143675 }, { "epoch": 1.41, "grad_norm": 5.888818740844727, "learning_rate": 2.696e-06, "loss": 0.1259, "step": 143700 }, { "epoch": 1.41, "grad_norm": 12.983704566955566, "learning_rate": 2.695568965517242e-06, "loss": 0.2976, "step": 143725 }, { "epoch": 1.41, "grad_norm": 9.894708633422852, "learning_rate": 2.6951379310344826e-06, "loss": 0.1534, "step": 143750 }, { "epoch": 1.41, "grad_norm": 4.161340236663818, "learning_rate": 2.694706896551724e-06, "loss": 0.2383, "step": 143775 }, { "epoch": 1.41, "grad_norm": 7.995719909667969, "learning_rate": 2.694275862068966e-06, "loss": 0.1415, "step": 143800 }, { "epoch": 1.41, "grad_norm": 5.157273292541504, "learning_rate": 2.6938448275862074e-06, "loss": 0.3634, "step": 143825 }, { "epoch": 1.41, "grad_norm": 9.271145820617676, "learning_rate": 2.693413793103449e-06, "loss": 0.1743, "step": 143850 }, { "epoch": 1.42, "grad_norm": 3.9513862133026123, "learning_rate": 2.69298275862069e-06, "loss": 0.2578, "step": 143875 }, { "epoch": 1.42, "grad_norm": 2.3214869499206543, "learning_rate": 2.6925517241379313e-06, "loss": 0.1478, "step": 143900 }, { "epoch": 1.42, "grad_norm": 5.583293437957764, "learning_rate": 2.692120689655173e-06, "loss": 0.2395, "step": 143925 }, { "epoch": 1.42, "grad_norm": 10.190409660339355, "learning_rate": 2.6916896551724142e-06, "loss": 0.1891, "step": 143950 }, { "epoch": 1.42, "grad_norm": 4.477851390838623, "learning_rate": 2.6912586206896553e-06, "loss": 0.3094, "step": 143975 }, { "epoch": 1.42, "grad_norm": 7.0756611824035645, "learning_rate": 2.6908275862068967e-06, "loss": 0.1519, "step": 144000 }, { "epoch": 1.42, "grad_norm": 3.9382524490356445, "learning_rate": 2.6904137931034486e-06, "loss": 0.2497, "step": 144025 }, { "epoch": 1.42, "grad_norm": 22.306934356689453, "learning_rate": 2.68998275862069e-06, "loss": 0.1343, "step": 144050 }, { "epoch": 1.42, "grad_norm": 4.06355094909668, "learning_rate": 2.6895517241379315e-06, "loss": 0.3181, "step": 144075 }, { "epoch": 1.42, "grad_norm": 8.541732788085938, "learning_rate": 2.6891206896551726e-06, "loss": 0.179, "step": 144100 }, { "epoch": 1.42, "grad_norm": 4.9263482093811035, "learning_rate": 2.688689655172414e-06, "loss": 0.2947, "step": 144125 }, { "epoch": 1.42, "grad_norm": 8.736109733581543, "learning_rate": 2.6882586206896555e-06, "loss": 0.1188, "step": 144150 }, { "epoch": 1.42, "grad_norm": 4.204532623291016, "learning_rate": 2.687827586206897e-06, "loss": 0.2972, "step": 144175 }, { "epoch": 1.42, "grad_norm": 5.848293781280518, "learning_rate": 2.687396551724138e-06, "loss": 0.1442, "step": 144200 }, { "epoch": 1.42, "grad_norm": 4.0291666984558105, "learning_rate": 2.6869655172413794e-06, "loss": 0.2788, "step": 144225 }, { "epoch": 1.42, "grad_norm": 6.904865741729736, "learning_rate": 2.686534482758621e-06, "loss": 0.1424, "step": 144250 }, { "epoch": 1.42, "grad_norm": 4.616687774658203, "learning_rate": 2.6861034482758623e-06, "loss": 0.3202, "step": 144275 }, { "epoch": 1.42, "grad_norm": 10.314496994018555, "learning_rate": 2.6856724137931034e-06, "loss": 0.1668, "step": 144300 }, { "epoch": 1.42, "grad_norm": 3.4262328147888184, "learning_rate": 2.685241379310345e-06, "loss": 0.2907, "step": 144325 }, { "epoch": 1.42, "grad_norm": 7.961042881011963, "learning_rate": 2.6848103448275863e-06, "loss": 0.1326, "step": 144350 }, { "epoch": 1.42, "grad_norm": 3.938000440597534, "learning_rate": 2.6843793103448277e-06, "loss": 0.2744, "step": 144375 }, { "epoch": 1.42, "grad_norm": 6.6802167892456055, "learning_rate": 2.683948275862069e-06, "loss": 0.1313, "step": 144400 }, { "epoch": 1.42, "grad_norm": 5.84165620803833, "learning_rate": 2.6835172413793102e-06, "loss": 0.2641, "step": 144425 }, { "epoch": 1.42, "grad_norm": 8.802838325500488, "learning_rate": 2.6830862068965517e-06, "loss": 0.1627, "step": 144450 }, { "epoch": 1.42, "grad_norm": 4.646279335021973, "learning_rate": 2.6826551724137936e-06, "loss": 0.2631, "step": 144475 }, { "epoch": 1.42, "grad_norm": 8.240520477294922, "learning_rate": 2.682224137931035e-06, "loss": 0.1512, "step": 144500 }, { "epoch": 1.42, "grad_norm": 5.395285606384277, "learning_rate": 2.6817931034482756e-06, "loss": 0.2852, "step": 144525 }, { "epoch": 1.42, "grad_norm": 7.395756244659424, "learning_rate": 2.6813620689655175e-06, "loss": 0.141, "step": 144550 }, { "epoch": 1.42, "grad_norm": 7.575732231140137, "learning_rate": 2.680931034482759e-06, "loss": 0.299, "step": 144575 }, { "epoch": 1.42, "grad_norm": 11.368133544921875, "learning_rate": 2.6805000000000004e-06, "loss": 0.1404, "step": 144600 }, { "epoch": 1.42, "grad_norm": 3.2246153354644775, "learning_rate": 2.680068965517242e-06, "loss": 0.3224, "step": 144625 }, { "epoch": 1.42, "grad_norm": 5.089075088500977, "learning_rate": 2.679637931034483e-06, "loss": 0.1461, "step": 144650 }, { "epoch": 1.42, "grad_norm": 3.9709856510162354, "learning_rate": 2.679224137931035e-06, "loss": 0.3149, "step": 144675 }, { "epoch": 1.42, "grad_norm": 7.171965599060059, "learning_rate": 2.6787931034482763e-06, "loss": 0.1518, "step": 144700 }, { "epoch": 1.42, "grad_norm": 5.079217433929443, "learning_rate": 2.6783620689655177e-06, "loss": 0.2766, "step": 144725 }, { "epoch": 1.42, "grad_norm": 10.989347457885742, "learning_rate": 2.6779310344827587e-06, "loss": 0.1471, "step": 144750 }, { "epoch": 1.42, "grad_norm": 3.931748867034912, "learning_rate": 2.6775e-06, "loss": 0.3213, "step": 144775 }, { "epoch": 1.42, "grad_norm": 3.041590929031372, "learning_rate": 2.6770689655172417e-06, "loss": 0.1461, "step": 144800 }, { "epoch": 1.42, "grad_norm": 4.077139377593994, "learning_rate": 2.676637931034483e-06, "loss": 0.2991, "step": 144825 }, { "epoch": 1.42, "grad_norm": 8.991887092590332, "learning_rate": 2.6762068965517246e-06, "loss": 0.1638, "step": 144850 }, { "epoch": 1.42, "grad_norm": 3.860403537750244, "learning_rate": 2.6757758620689656e-06, "loss": 0.2975, "step": 144875 }, { "epoch": 1.43, "grad_norm": 7.530970096588135, "learning_rate": 2.675344827586207e-06, "loss": 0.1277, "step": 144900 }, { "epoch": 1.43, "grad_norm": 4.226709842681885, "learning_rate": 2.6749137931034485e-06, "loss": 0.3026, "step": 144925 }, { "epoch": 1.43, "grad_norm": 8.721722602844238, "learning_rate": 2.67448275862069e-06, "loss": 0.1396, "step": 144950 }, { "epoch": 1.43, "grad_norm": 5.486352920532227, "learning_rate": 2.674051724137931e-06, "loss": 0.2324, "step": 144975 }, { "epoch": 1.43, "grad_norm": 8.025662422180176, "learning_rate": 2.6736206896551725e-06, "loss": 0.1221, "step": 145000 }, { "epoch": 1.43, "grad_norm": 3.7867202758789062, "learning_rate": 2.673189655172414e-06, "loss": 0.3433, "step": 145025 }, { "epoch": 1.43, "grad_norm": 4.753378868103027, "learning_rate": 2.6727586206896554e-06, "loss": 0.1444, "step": 145050 }, { "epoch": 1.43, "grad_norm": 4.426959991455078, "learning_rate": 2.672327586206897e-06, "loss": 0.2374, "step": 145075 }, { "epoch": 1.43, "grad_norm": 8.413296699523926, "learning_rate": 2.671896551724138e-06, "loss": 0.1505, "step": 145100 }, { "epoch": 1.43, "grad_norm": 3.5787503719329834, "learning_rate": 2.6714655172413793e-06, "loss": 0.3354, "step": 145125 }, { "epoch": 1.43, "grad_norm": 3.904345750808716, "learning_rate": 2.6710344827586208e-06, "loss": 0.1369, "step": 145150 }, { "epoch": 1.43, "grad_norm": 4.185202121734619, "learning_rate": 2.6706034482758627e-06, "loss": 0.2533, "step": 145175 }, { "epoch": 1.43, "grad_norm": 9.193115234375, "learning_rate": 2.6701724137931033e-06, "loss": 0.1279, "step": 145200 }, { "epoch": 1.43, "grad_norm": 5.7937469482421875, "learning_rate": 2.669741379310345e-06, "loss": 0.3248, "step": 145225 }, { "epoch": 1.43, "grad_norm": 7.6860575675964355, "learning_rate": 2.6693103448275866e-06, "loss": 0.1728, "step": 145250 }, { "epoch": 1.43, "grad_norm": 4.493420124053955, "learning_rate": 2.668879310344828e-06, "loss": 0.2747, "step": 145275 }, { "epoch": 1.43, "grad_norm": 10.679661750793457, "learning_rate": 2.6684482758620695e-06, "loss": 0.1671, "step": 145300 }, { "epoch": 1.43, "grad_norm": 4.731395244598389, "learning_rate": 2.6680172413793105e-06, "loss": 0.2686, "step": 145325 }, { "epoch": 1.43, "grad_norm": 3.9049768447875977, "learning_rate": 2.667586206896552e-06, "loss": 0.1681, "step": 145350 }, { "epoch": 1.43, "grad_norm": 5.939428806304932, "learning_rate": 2.6671551724137935e-06, "loss": 0.2853, "step": 145375 }, { "epoch": 1.43, "grad_norm": 9.455083847045898, "learning_rate": 2.666724137931035e-06, "loss": 0.1332, "step": 145400 }, { "epoch": 1.43, "grad_norm": 3.917623996734619, "learning_rate": 2.666293103448276e-06, "loss": 0.2453, "step": 145425 }, { "epoch": 1.43, "grad_norm": 11.704837799072266, "learning_rate": 2.6658620689655174e-06, "loss": 0.1729, "step": 145450 }, { "epoch": 1.43, "grad_norm": 4.386477470397949, "learning_rate": 2.665431034482759e-06, "loss": 0.2473, "step": 145475 }, { "epoch": 1.43, "grad_norm": 10.414874076843262, "learning_rate": 2.6650000000000003e-06, "loss": 0.1397, "step": 145500 }, { "epoch": 1.43, "grad_norm": 5.365670680999756, "learning_rate": 2.6645689655172418e-06, "loss": 0.293, "step": 145525 }, { "epoch": 1.43, "grad_norm": 8.03165340423584, "learning_rate": 2.664137931034483e-06, "loss": 0.1397, "step": 145550 }, { "epoch": 1.43, "grad_norm": 5.188912868499756, "learning_rate": 2.6637068965517243e-06, "loss": 0.3352, "step": 145575 }, { "epoch": 1.43, "grad_norm": 7.47743034362793, "learning_rate": 2.6632758620689657e-06, "loss": 0.1769, "step": 145600 }, { "epoch": 1.43, "grad_norm": 3.923269748687744, "learning_rate": 2.662844827586207e-06, "loss": 0.287, "step": 145625 }, { "epoch": 1.43, "grad_norm": 6.485673904418945, "learning_rate": 2.662413793103448e-06, "loss": 0.1386, "step": 145650 }, { "epoch": 1.43, "grad_norm": 3.594003438949585, "learning_rate": 2.6619827586206897e-06, "loss": 0.2363, "step": 145675 }, { "epoch": 1.43, "grad_norm": 8.777582168579102, "learning_rate": 2.661551724137931e-06, "loss": 0.1464, "step": 145700 }, { "epoch": 1.43, "grad_norm": 3.5580763816833496, "learning_rate": 2.6611206896551726e-06, "loss": 0.262, "step": 145725 }, { "epoch": 1.43, "grad_norm": 5.091268062591553, "learning_rate": 2.6606896551724145e-06, "loss": 0.1422, "step": 145750 }, { "epoch": 1.43, "grad_norm": 5.762702941894531, "learning_rate": 2.660258620689655e-06, "loss": 0.3233, "step": 145775 }, { "epoch": 1.43, "grad_norm": 18.802532196044922, "learning_rate": 2.659827586206897e-06, "loss": 0.1421, "step": 145800 }, { "epoch": 1.43, "grad_norm": 5.846185207366943, "learning_rate": 2.6593965517241384e-06, "loss": 0.332, "step": 145825 }, { "epoch": 1.43, "grad_norm": 7.765486240386963, "learning_rate": 2.65896551724138e-06, "loss": 0.1494, "step": 145850 }, { "epoch": 1.43, "grad_norm": 5.857262134552002, "learning_rate": 2.658534482758621e-06, "loss": 0.3424, "step": 145875 }, { "epoch": 1.43, "grad_norm": 7.1266632080078125, "learning_rate": 2.6581034482758624e-06, "loss": 0.1752, "step": 145900 }, { "epoch": 1.44, "grad_norm": 3.154541492462158, "learning_rate": 2.657672413793104e-06, "loss": 0.2427, "step": 145925 }, { "epoch": 1.44, "grad_norm": 5.055972576141357, "learning_rate": 2.6572413793103453e-06, "loss": 0.1174, "step": 145950 }, { "epoch": 1.44, "grad_norm": 4.670867919921875, "learning_rate": 2.6568103448275863e-06, "loss": 0.243, "step": 145975 }, { "epoch": 1.44, "grad_norm": 9.68189811706543, "learning_rate": 2.6563793103448278e-06, "loss": 0.1379, "step": 146000 }, { "epoch": 1.44, "grad_norm": 4.900539875030518, "learning_rate": 2.655948275862069e-06, "loss": 0.2923, "step": 146025 }, { "epoch": 1.44, "grad_norm": 9.463906288146973, "learning_rate": 2.6555172413793107e-06, "loss": 0.1985, "step": 146050 }, { "epoch": 1.44, "grad_norm": 4.894120693206787, "learning_rate": 2.655086206896552e-06, "loss": 0.302, "step": 146075 }, { "epoch": 1.44, "grad_norm": 2.1843433380126953, "learning_rate": 2.654655172413793e-06, "loss": 0.1469, "step": 146100 }, { "epoch": 1.44, "grad_norm": 5.010056495666504, "learning_rate": 2.6542241379310346e-06, "loss": 0.2661, "step": 146125 }, { "epoch": 1.44, "grad_norm": 17.95389747619629, "learning_rate": 2.653793103448276e-06, "loss": 0.1485, "step": 146150 }, { "epoch": 1.44, "grad_norm": 5.5630292892456055, "learning_rate": 2.6533620689655175e-06, "loss": 0.2843, "step": 146175 }, { "epoch": 1.44, "grad_norm": 11.800280570983887, "learning_rate": 2.6529310344827586e-06, "loss": 0.1382, "step": 146200 }, { "epoch": 1.44, "grad_norm": 7.132488250732422, "learning_rate": 2.6525e-06, "loss": 0.2955, "step": 146225 }, { "epoch": 1.44, "grad_norm": 8.813624382019043, "learning_rate": 2.6520689655172415e-06, "loss": 0.1459, "step": 146250 }, { "epoch": 1.44, "grad_norm": 6.781558513641357, "learning_rate": 2.651637931034483e-06, "loss": 0.3524, "step": 146275 }, { "epoch": 1.44, "grad_norm": 11.670299530029297, "learning_rate": 2.6512068965517244e-06, "loss": 0.138, "step": 146300 }, { "epoch": 1.44, "grad_norm": 5.336037635803223, "learning_rate": 2.6507758620689654e-06, "loss": 0.2831, "step": 146325 }, { "epoch": 1.44, "grad_norm": 6.869719982147217, "learning_rate": 2.650344827586207e-06, "loss": 0.1588, "step": 146350 }, { "epoch": 1.44, "grad_norm": 4.082289218902588, "learning_rate": 2.6499137931034483e-06, "loss": 0.2494, "step": 146375 }, { "epoch": 1.44, "grad_norm": 11.030773162841797, "learning_rate": 2.64948275862069e-06, "loss": 0.1546, "step": 146400 }, { "epoch": 1.44, "grad_norm": 4.401002883911133, "learning_rate": 2.649051724137931e-06, "loss": 0.2784, "step": 146425 }, { "epoch": 1.44, "grad_norm": 10.135530471801758, "learning_rate": 2.6486206896551727e-06, "loss": 0.1754, "step": 146450 }, { "epoch": 1.44, "grad_norm": 5.358943462371826, "learning_rate": 2.648189655172414e-06, "loss": 0.2745, "step": 146475 }, { "epoch": 1.44, "grad_norm": 8.319809913635254, "learning_rate": 2.6477586206896556e-06, "loss": 0.1454, "step": 146500 }, { "epoch": 1.44, "grad_norm": 2.8521227836608887, "learning_rate": 2.647327586206897e-06, "loss": 0.3009, "step": 146525 }, { "epoch": 1.44, "grad_norm": 6.619643688201904, "learning_rate": 2.646896551724138e-06, "loss": 0.1365, "step": 146550 }, { "epoch": 1.44, "grad_norm": 4.53214168548584, "learning_rate": 2.6464655172413796e-06, "loss": 0.2648, "step": 146575 }, { "epoch": 1.44, "grad_norm": 14.909058570861816, "learning_rate": 2.646034482758621e-06, "loss": 0.1984, "step": 146600 }, { "epoch": 1.44, "grad_norm": 4.450878620147705, "learning_rate": 2.6456034482758625e-06, "loss": 0.2734, "step": 146625 }, { "epoch": 1.44, "grad_norm": 6.663828372955322, "learning_rate": 2.6451724137931035e-06, "loss": 0.1657, "step": 146650 }, { "epoch": 1.44, "grad_norm": 4.187229633331299, "learning_rate": 2.644741379310345e-06, "loss": 0.2778, "step": 146675 }, { "epoch": 1.44, "grad_norm": 6.04264497756958, "learning_rate": 2.6443103448275864e-06, "loss": 0.1323, "step": 146700 }, { "epoch": 1.44, "grad_norm": 5.23392915725708, "learning_rate": 2.643879310344828e-06, "loss": 0.2496, "step": 146725 }, { "epoch": 1.44, "grad_norm": 10.796889305114746, "learning_rate": 2.6434482758620693e-06, "loss": 0.1536, "step": 146750 }, { "epoch": 1.44, "grad_norm": 9.629984855651855, "learning_rate": 2.6430172413793104e-06, "loss": 0.2985, "step": 146775 }, { "epoch": 1.44, "grad_norm": 10.283785820007324, "learning_rate": 2.642586206896552e-06, "loss": 0.1387, "step": 146800 }, { "epoch": 1.44, "grad_norm": 4.57559871673584, "learning_rate": 2.6421551724137933e-06, "loss": 0.3074, "step": 146825 }, { "epoch": 1.44, "grad_norm": 4.425680637359619, "learning_rate": 2.6417241379310347e-06, "loss": 0.1307, "step": 146850 }, { "epoch": 1.44, "grad_norm": 5.159614562988281, "learning_rate": 2.6412931034482758e-06, "loss": 0.2444, "step": 146875 }, { "epoch": 1.44, "grad_norm": 13.039280891418457, "learning_rate": 2.6408620689655172e-06, "loss": 0.1513, "step": 146900 }, { "epoch": 1.45, "grad_norm": 5.501970291137695, "learning_rate": 2.6404310344827587e-06, "loss": 0.2689, "step": 146925 }, { "epoch": 1.45, "grad_norm": 5.915893077850342, "learning_rate": 2.64e-06, "loss": 0.1298, "step": 146950 }, { "epoch": 1.45, "grad_norm": 14.133424758911133, "learning_rate": 2.639568965517242e-06, "loss": 0.2677, "step": 146975 }, { "epoch": 1.45, "grad_norm": 10.174654960632324, "learning_rate": 2.6391379310344826e-06, "loss": 0.1376, "step": 147000 }, { "epoch": 1.45, "grad_norm": 4.174671649932861, "learning_rate": 2.6387068965517245e-06, "loss": 0.2845, "step": 147025 }, { "epoch": 1.45, "grad_norm": 8.124462127685547, "learning_rate": 2.638275862068966e-06, "loss": 0.1604, "step": 147050 }, { "epoch": 1.45, "grad_norm": 4.816062927246094, "learning_rate": 2.6378448275862074e-06, "loss": 0.3477, "step": 147075 }, { "epoch": 1.45, "grad_norm": 10.138089179992676, "learning_rate": 2.6374137931034484e-06, "loss": 0.1727, "step": 147100 }, { "epoch": 1.45, "grad_norm": 6.020908832550049, "learning_rate": 2.63698275862069e-06, "loss": 0.2792, "step": 147125 }, { "epoch": 1.45, "grad_norm": 13.233007431030273, "learning_rate": 2.6365517241379314e-06, "loss": 0.1645, "step": 147150 }, { "epoch": 1.45, "grad_norm": 4.776684761047363, "learning_rate": 2.636120689655173e-06, "loss": 0.2543, "step": 147175 }, { "epoch": 1.45, "grad_norm": 14.491292953491211, "learning_rate": 2.6356896551724143e-06, "loss": 0.1263, "step": 147200 }, { "epoch": 1.45, "grad_norm": 5.4883856773376465, "learning_rate": 2.6352586206896553e-06, "loss": 0.2651, "step": 147225 }, { "epoch": 1.45, "grad_norm": 11.888299942016602, "learning_rate": 2.6348275862068968e-06, "loss": 0.1573, "step": 147250 }, { "epoch": 1.45, "grad_norm": 4.593320846557617, "learning_rate": 2.6343965517241382e-06, "loss": 0.3238, "step": 147275 }, { "epoch": 1.45, "grad_norm": 4.608426570892334, "learning_rate": 2.6339655172413797e-06, "loss": 0.1658, "step": 147300 }, { "epoch": 1.45, "grad_norm": 4.088407039642334, "learning_rate": 2.6335344827586207e-06, "loss": 0.2861, "step": 147325 }, { "epoch": 1.45, "grad_norm": 3.767559766769409, "learning_rate": 2.633103448275862e-06, "loss": 0.1113, "step": 147350 }, { "epoch": 1.45, "grad_norm": 3.7619965076446533, "learning_rate": 2.6326724137931036e-06, "loss": 0.213, "step": 147375 }, { "epoch": 1.45, "grad_norm": 9.57185173034668, "learning_rate": 2.632241379310345e-06, "loss": 0.1191, "step": 147400 }, { "epoch": 1.45, "grad_norm": 5.937933444976807, "learning_rate": 2.6318103448275865e-06, "loss": 0.2858, "step": 147425 }, { "epoch": 1.45, "grad_norm": 5.255437850952148, "learning_rate": 2.6313793103448276e-06, "loss": 0.1722, "step": 147450 }, { "epoch": 1.45, "grad_norm": 3.289064645767212, "learning_rate": 2.630948275862069e-06, "loss": 0.2737, "step": 147475 }, { "epoch": 1.45, "grad_norm": 12.626593589782715, "learning_rate": 2.6305172413793105e-06, "loss": 0.1725, "step": 147500 }, { "epoch": 1.45, "grad_norm": 5.077467441558838, "learning_rate": 2.630086206896552e-06, "loss": 0.2866, "step": 147525 }, { "epoch": 1.45, "grad_norm": 6.5543718338012695, "learning_rate": 2.629655172413793e-06, "loss": 0.1372, "step": 147550 }, { "epoch": 1.45, "grad_norm": 2.936246395111084, "learning_rate": 2.6292241379310344e-06, "loss": 0.3006, "step": 147575 }, { "epoch": 1.45, "grad_norm": 8.825339317321777, "learning_rate": 2.628793103448276e-06, "loss": 0.1484, "step": 147600 }, { "epoch": 1.45, "grad_norm": 3.433964729309082, "learning_rate": 2.6283620689655178e-06, "loss": 0.2549, "step": 147625 }, { "epoch": 1.45, "grad_norm": 15.392630577087402, "learning_rate": 2.6279310344827592e-06, "loss": 0.1583, "step": 147650 }, { "epoch": 1.45, "grad_norm": 5.863964557647705, "learning_rate": 2.6275000000000003e-06, "loss": 0.2695, "step": 147675 }, { "epoch": 1.45, "grad_norm": 7.934664726257324, "learning_rate": 2.6270689655172417e-06, "loss": 0.1869, "step": 147700 }, { "epoch": 1.45, "grad_norm": 6.4542036056518555, "learning_rate": 2.626637931034483e-06, "loss": 0.296, "step": 147725 }, { "epoch": 1.45, "grad_norm": 8.262805938720703, "learning_rate": 2.6262068965517246e-06, "loss": 0.1294, "step": 147750 }, { "epoch": 1.45, "grad_norm": 3.8927857875823975, "learning_rate": 2.6257758620689657e-06, "loss": 0.3116, "step": 147775 }, { "epoch": 1.45, "grad_norm": 4.765081405639648, "learning_rate": 2.625344827586207e-06, "loss": 0.1816, "step": 147800 }, { "epoch": 1.45, "grad_norm": 5.0304856300354, "learning_rate": 2.6249137931034486e-06, "loss": 0.291, "step": 147825 }, { "epoch": 1.45, "grad_norm": 6.549523830413818, "learning_rate": 2.62448275862069e-06, "loss": 0.1492, "step": 147850 }, { "epoch": 1.45, "grad_norm": 4.203779697418213, "learning_rate": 2.6240517241379315e-06, "loss": 0.2988, "step": 147875 }, { "epoch": 1.45, "grad_norm": 6.920319080352783, "learning_rate": 2.6236206896551725e-06, "loss": 0.1343, "step": 147900 }, { "epoch": 1.45, "grad_norm": 5.438080310821533, "learning_rate": 2.623189655172414e-06, "loss": 0.2843, "step": 147925 }, { "epoch": 1.46, "grad_norm": 7.4731597900390625, "learning_rate": 2.6227586206896554e-06, "loss": 0.1335, "step": 147950 }, { "epoch": 1.46, "grad_norm": 4.443714141845703, "learning_rate": 2.622327586206897e-06, "loss": 0.2454, "step": 147975 }, { "epoch": 1.46, "grad_norm": 7.7084503173828125, "learning_rate": 2.621896551724138e-06, "loss": 0.1466, "step": 148000 }, { "epoch": 1.46, "grad_norm": 5.788434982299805, "learning_rate": 2.6214655172413794e-06, "loss": 0.3396, "step": 148025 }, { "epoch": 1.46, "grad_norm": 10.663294792175293, "learning_rate": 2.621034482758621e-06, "loss": 0.1276, "step": 148050 }, { "epoch": 1.46, "grad_norm": 4.620105743408203, "learning_rate": 2.6206034482758623e-06, "loss": 0.2481, "step": 148075 }, { "epoch": 1.46, "grad_norm": 9.97309398651123, "learning_rate": 2.6201724137931033e-06, "loss": 0.153, "step": 148100 }, { "epoch": 1.46, "grad_norm": 5.128307342529297, "learning_rate": 2.6197413793103448e-06, "loss": 0.2983, "step": 148125 }, { "epoch": 1.46, "grad_norm": 0.38090628385543823, "learning_rate": 2.6193103448275862e-06, "loss": 0.1491, "step": 148150 }, { "epoch": 1.46, "grad_norm": 3.725146532058716, "learning_rate": 2.6188793103448277e-06, "loss": 0.2729, "step": 148175 }, { "epoch": 1.46, "grad_norm": 7.91438102722168, "learning_rate": 2.6184482758620696e-06, "loss": 0.1639, "step": 148200 }, { "epoch": 1.46, "grad_norm": 5.439512729644775, "learning_rate": 2.61801724137931e-06, "loss": 0.2906, "step": 148225 }, { "epoch": 1.46, "grad_norm": 12.386322975158691, "learning_rate": 2.617586206896552e-06, "loss": 0.1506, "step": 148250 }, { "epoch": 1.46, "grad_norm": 6.278012275695801, "learning_rate": 2.6171551724137935e-06, "loss": 0.2457, "step": 148275 }, { "epoch": 1.46, "grad_norm": 10.562688827514648, "learning_rate": 2.616724137931035e-06, "loss": 0.1974, "step": 148300 }, { "epoch": 1.46, "grad_norm": 5.937510967254639, "learning_rate": 2.616293103448276e-06, "loss": 0.3064, "step": 148325 }, { "epoch": 1.46, "grad_norm": 8.38988971710205, "learning_rate": 2.6158620689655175e-06, "loss": 0.1201, "step": 148350 }, { "epoch": 1.46, "grad_norm": 4.043553829193115, "learning_rate": 2.615431034482759e-06, "loss": 0.2863, "step": 148375 }, { "epoch": 1.46, "grad_norm": 5.5489726066589355, "learning_rate": 2.6150000000000004e-06, "loss": 0.1356, "step": 148400 }, { "epoch": 1.46, "grad_norm": 5.796104907989502, "learning_rate": 2.614568965517242e-06, "loss": 0.3201, "step": 148425 }, { "epoch": 1.46, "grad_norm": 11.244295120239258, "learning_rate": 2.614137931034483e-06, "loss": 0.1793, "step": 148450 }, { "epoch": 1.46, "grad_norm": 4.381658554077148, "learning_rate": 2.6137068965517243e-06, "loss": 0.3073, "step": 148475 }, { "epoch": 1.46, "grad_norm": 7.593491077423096, "learning_rate": 2.6132758620689658e-06, "loss": 0.1709, "step": 148500 }, { "epoch": 1.46, "grad_norm": 3.6117048263549805, "learning_rate": 2.6128448275862072e-06, "loss": 0.2569, "step": 148525 }, { "epoch": 1.46, "grad_norm": 2.0057148933410645, "learning_rate": 2.6124137931034483e-06, "loss": 0.104, "step": 148550 }, { "epoch": 1.46, "grad_norm": 4.489238262176514, "learning_rate": 2.6119827586206897e-06, "loss": 0.2655, "step": 148575 }, { "epoch": 1.46, "grad_norm": 6.246944904327393, "learning_rate": 2.611551724137931e-06, "loss": 0.1652, "step": 148600 }, { "epoch": 1.46, "grad_norm": 4.173450946807861, "learning_rate": 2.6111206896551726e-06, "loss": 0.28, "step": 148625 }, { "epoch": 1.46, "grad_norm": 9.881457328796387, "learning_rate": 2.610689655172414e-06, "loss": 0.1185, "step": 148650 }, { "epoch": 1.46, "grad_norm": 4.358567714691162, "learning_rate": 2.610258620689655e-06, "loss": 0.269, "step": 148675 }, { "epoch": 1.46, "grad_norm": 9.286378860473633, "learning_rate": 2.6098275862068966e-06, "loss": 0.1775, "step": 148700 }, { "epoch": 1.46, "grad_norm": 6.751312732696533, "learning_rate": 2.6094137931034485e-06, "loss": 0.3532, "step": 148725 }, { "epoch": 1.46, "grad_norm": 5.466105937957764, "learning_rate": 2.60898275862069e-06, "loss": 0.1405, "step": 148750 }, { "epoch": 1.46, "grad_norm": 5.112781047821045, "learning_rate": 2.608551724137931e-06, "loss": 0.2854, "step": 148775 }, { "epoch": 1.46, "grad_norm": 0.09511134773492813, "learning_rate": 2.6081206896551724e-06, "loss": 0.1287, "step": 148800 }, { "epoch": 1.46, "grad_norm": 2.373049020767212, "learning_rate": 2.607689655172414e-06, "loss": 0.2568, "step": 148825 }, { "epoch": 1.46, "grad_norm": 5.907069206237793, "learning_rate": 2.6072586206896553e-06, "loss": 0.1042, "step": 148850 }, { "epoch": 1.46, "grad_norm": 3.3719892501831055, "learning_rate": 2.606827586206897e-06, "loss": 0.2477, "step": 148875 }, { "epoch": 1.46, "grad_norm": 6.32942008972168, "learning_rate": 2.606396551724138e-06, "loss": 0.1107, "step": 148900 }, { "epoch": 1.46, "grad_norm": 3.6535298824310303, "learning_rate": 2.6059655172413793e-06, "loss": 0.2651, "step": 148925 }, { "epoch": 1.46, "grad_norm": 9.099654197692871, "learning_rate": 2.605534482758621e-06, "loss": 0.138, "step": 148950 }, { "epoch": 1.47, "grad_norm": 4.30637264251709, "learning_rate": 2.6051034482758626e-06, "loss": 0.2647, "step": 148975 }, { "epoch": 1.47, "grad_norm": 8.042098045349121, "learning_rate": 2.6046724137931036e-06, "loss": 0.1508, "step": 149000 }, { "epoch": 1.47, "grad_norm": 3.8351640701293945, "learning_rate": 2.604241379310345e-06, "loss": 0.2722, "step": 149025 }, { "epoch": 1.47, "grad_norm": 3.039729356765747, "learning_rate": 2.6038103448275866e-06, "loss": 0.1206, "step": 149050 }, { "epoch": 1.47, "grad_norm": 6.396461009979248, "learning_rate": 2.603379310344828e-06, "loss": 0.2739, "step": 149075 }, { "epoch": 1.47, "grad_norm": 8.613993644714355, "learning_rate": 2.6029482758620695e-06, "loss": 0.1283, "step": 149100 }, { "epoch": 1.47, "grad_norm": 5.864699840545654, "learning_rate": 2.6025172413793105e-06, "loss": 0.2354, "step": 149125 }, { "epoch": 1.47, "grad_norm": 12.844889640808105, "learning_rate": 2.602086206896552e-06, "loss": 0.1516, "step": 149150 }, { "epoch": 1.47, "grad_norm": 4.790891170501709, "learning_rate": 2.6016551724137934e-06, "loss": 0.2628, "step": 149175 }, { "epoch": 1.47, "grad_norm": 12.722443580627441, "learning_rate": 2.601224137931035e-06, "loss": 0.1159, "step": 149200 }, { "epoch": 1.47, "grad_norm": 3.492903232574463, "learning_rate": 2.600793103448276e-06, "loss": 0.2756, "step": 149225 }, { "epoch": 1.47, "grad_norm": 3.5918569564819336, "learning_rate": 2.6003620689655174e-06, "loss": 0.1324, "step": 149250 }, { "epoch": 1.47, "grad_norm": 4.201985836029053, "learning_rate": 2.599931034482759e-06, "loss": 0.2919, "step": 149275 }, { "epoch": 1.47, "grad_norm": 7.143906593322754, "learning_rate": 2.5995000000000003e-06, "loss": 0.1575, "step": 149300 }, { "epoch": 1.47, "grad_norm": 4.719413757324219, "learning_rate": 2.5990689655172417e-06, "loss": 0.2749, "step": 149325 }, { "epoch": 1.47, "grad_norm": 9.759149551391602, "learning_rate": 2.5986379310344828e-06, "loss": 0.1839, "step": 149350 }, { "epoch": 1.47, "grad_norm": 4.65657377243042, "learning_rate": 2.5982068965517242e-06, "loss": 0.2905, "step": 149375 }, { "epoch": 1.47, "grad_norm": 6.843874454498291, "learning_rate": 2.5977758620689657e-06, "loss": 0.1544, "step": 149400 }, { "epoch": 1.47, "grad_norm": 5.3671555519104, "learning_rate": 2.597344827586207e-06, "loss": 0.3176, "step": 149425 }, { "epoch": 1.47, "grad_norm": 10.090563774108887, "learning_rate": 2.596913793103448e-06, "loss": 0.1589, "step": 149450 }, { "epoch": 1.47, "grad_norm": 3.473323106765747, "learning_rate": 2.5964827586206896e-06, "loss": 0.2571, "step": 149475 }, { "epoch": 1.47, "grad_norm": 5.944076061248779, "learning_rate": 2.596051724137931e-06, "loss": 0.1288, "step": 149500 }, { "epoch": 1.47, "grad_norm": 6.48117733001709, "learning_rate": 2.595620689655173e-06, "loss": 0.2701, "step": 149525 }, { "epoch": 1.47, "grad_norm": 12.222833633422852, "learning_rate": 2.5951896551724144e-06, "loss": 0.144, "step": 149550 }, { "epoch": 1.47, "grad_norm": 3.944042205810547, "learning_rate": 2.594758620689655e-06, "loss": 0.2677, "step": 149575 }, { "epoch": 1.47, "grad_norm": 10.847691535949707, "learning_rate": 2.594327586206897e-06, "loss": 0.1658, "step": 149600 }, { "epoch": 1.47, "grad_norm": 2.897994041442871, "learning_rate": 2.5938965517241384e-06, "loss": 0.2464, "step": 149625 }, { "epoch": 1.47, "grad_norm": 11.76009464263916, "learning_rate": 2.59346551724138e-06, "loss": 0.1623, "step": 149650 }, { "epoch": 1.47, "grad_norm": 3.706820487976074, "learning_rate": 2.593034482758621e-06, "loss": 0.2924, "step": 149675 }, { "epoch": 1.47, "grad_norm": 10.358333587646484, "learning_rate": 2.5926034482758623e-06, "loss": 0.1835, "step": 149700 }, { "epoch": 1.47, "grad_norm": 4.051035404205322, "learning_rate": 2.5921724137931038e-06, "loss": 0.3061, "step": 149725 }, { "epoch": 1.47, "grad_norm": 3.2582526206970215, "learning_rate": 2.5917413793103452e-06, "loss": 0.1311, "step": 149750 }, { "epoch": 1.47, "grad_norm": 4.290843963623047, "learning_rate": 2.5913103448275862e-06, "loss": 0.2567, "step": 149775 }, { "epoch": 1.47, "grad_norm": 6.550058364868164, "learning_rate": 2.5908793103448277e-06, "loss": 0.1376, "step": 149800 }, { "epoch": 1.47, "grad_norm": 3.815490484237671, "learning_rate": 2.590448275862069e-06, "loss": 0.255, "step": 149825 }, { "epoch": 1.47, "grad_norm": 13.79286003112793, "learning_rate": 2.5900172413793106e-06, "loss": 0.1547, "step": 149850 }, { "epoch": 1.47, "grad_norm": 4.951208114624023, "learning_rate": 2.589586206896552e-06, "loss": 0.2501, "step": 149875 }, { "epoch": 1.47, "grad_norm": 12.434769630432129, "learning_rate": 2.589155172413793e-06, "loss": 0.1364, "step": 149900 }, { "epoch": 1.47, "grad_norm": 5.536574363708496, "learning_rate": 2.5887241379310346e-06, "loss": 0.2887, "step": 149925 }, { "epoch": 1.47, "grad_norm": 6.019182205200195, "learning_rate": 2.588293103448276e-06, "loss": 0.1101, "step": 149950 }, { "epoch": 1.48, "grad_norm": 5.093970775604248, "learning_rate": 2.5878620689655175e-06, "loss": 0.2877, "step": 149975 }, { "epoch": 1.48, "grad_norm": 6.011607646942139, "learning_rate": 2.5874310344827585e-06, "loss": 0.1781, "step": 150000 }, { "epoch": 1.48, "grad_norm": 3.716367721557617, "learning_rate": 2.587e-06, "loss": 0.3138, "step": 150025 }, { "epoch": 1.48, "grad_norm": 5.818077087402344, "learning_rate": 2.5865689655172414e-06, "loss": 0.167, "step": 150050 }, { "epoch": 1.48, "grad_norm": 5.1447882652282715, "learning_rate": 2.586137931034483e-06, "loss": 0.2728, "step": 150075 }, { "epoch": 1.48, "grad_norm": 13.096114158630371, "learning_rate": 2.5857068965517248e-06, "loss": 0.1512, "step": 150100 }, { "epoch": 1.48, "grad_norm": 6.349709510803223, "learning_rate": 2.5852758620689654e-06, "loss": 0.2764, "step": 150125 }, { "epoch": 1.48, "grad_norm": 7.748781681060791, "learning_rate": 2.584844827586207e-06, "loss": 0.152, "step": 150150 }, { "epoch": 1.48, "grad_norm": 4.555060863494873, "learning_rate": 2.5844137931034487e-06, "loss": 0.3077, "step": 150175 }, { "epoch": 1.48, "grad_norm": 7.7202887535095215, "learning_rate": 2.58398275862069e-06, "loss": 0.1775, "step": 150200 }, { "epoch": 1.48, "grad_norm": 4.848303318023682, "learning_rate": 2.583551724137931e-06, "loss": 0.2941, "step": 150225 }, { "epoch": 1.48, "grad_norm": 8.998858451843262, "learning_rate": 2.5831206896551726e-06, "loss": 0.144, "step": 150250 }, { "epoch": 1.48, "grad_norm": 3.5278468132019043, "learning_rate": 2.582689655172414e-06, "loss": 0.2951, "step": 150275 }, { "epoch": 1.48, "grad_norm": 8.494914054870605, "learning_rate": 2.5822586206896556e-06, "loss": 0.1156, "step": 150300 }, { "epoch": 1.48, "grad_norm": 3.0449106693267822, "learning_rate": 2.581827586206897e-06, "loss": 0.2737, "step": 150325 }, { "epoch": 1.48, "grad_norm": 5.658406734466553, "learning_rate": 2.581396551724138e-06, "loss": 0.1522, "step": 150350 }, { "epoch": 1.48, "grad_norm": 4.437618732452393, "learning_rate": 2.5809655172413795e-06, "loss": 0.2991, "step": 150375 }, { "epoch": 1.48, "grad_norm": 7.984445571899414, "learning_rate": 2.580534482758621e-06, "loss": 0.1143, "step": 150400 }, { "epoch": 1.48, "grad_norm": 3.168463706970215, "learning_rate": 2.5801034482758624e-06, "loss": 0.3099, "step": 150425 }, { "epoch": 1.48, "grad_norm": 9.19427490234375, "learning_rate": 2.5796724137931035e-06, "loss": 0.1786, "step": 150450 }, { "epoch": 1.48, "grad_norm": 3.7442333698272705, "learning_rate": 2.579241379310345e-06, "loss": 0.2983, "step": 150475 }, { "epoch": 1.48, "grad_norm": 6.864234924316406, "learning_rate": 2.5788103448275864e-06, "loss": 0.173, "step": 150500 }, { "epoch": 1.48, "grad_norm": 4.71879243850708, "learning_rate": 2.578379310344828e-06, "loss": 0.2746, "step": 150525 }, { "epoch": 1.48, "grad_norm": 7.823304176330566, "learning_rate": 2.5779482758620693e-06, "loss": 0.1287, "step": 150550 }, { "epoch": 1.48, "grad_norm": 4.5079569816589355, "learning_rate": 2.5775172413793103e-06, "loss": 0.2664, "step": 150575 }, { "epoch": 1.48, "grad_norm": 5.430033206939697, "learning_rate": 2.5770862068965518e-06, "loss": 0.1538, "step": 150600 }, { "epoch": 1.48, "grad_norm": 4.24838399887085, "learning_rate": 2.5766551724137932e-06, "loss": 0.2879, "step": 150625 }, { "epoch": 1.48, "grad_norm": 7.3100266456604, "learning_rate": 2.5762241379310347e-06, "loss": 0.112, "step": 150650 }, { "epoch": 1.48, "grad_norm": 3.7041454315185547, "learning_rate": 2.5757931034482757e-06, "loss": 0.3301, "step": 150675 }, { "epoch": 1.48, "grad_norm": 8.200089454650879, "learning_rate": 2.575362068965517e-06, "loss": 0.1301, "step": 150700 }, { "epoch": 1.48, "grad_norm": 4.3078508377075195, "learning_rate": 2.5749310344827586e-06, "loss": 0.2898, "step": 150725 }, { "epoch": 1.48, "grad_norm": 7.31600284576416, "learning_rate": 2.5745000000000005e-06, "loss": 0.1524, "step": 150750 }, { "epoch": 1.48, "grad_norm": 5.450228214263916, "learning_rate": 2.5740862068965524e-06, "loss": 0.3124, "step": 150775 }, { "epoch": 1.48, "grad_norm": 6.833547115325928, "learning_rate": 2.573655172413793e-06, "loss": 0.1528, "step": 150800 }, { "epoch": 1.48, "grad_norm": 4.503351211547852, "learning_rate": 2.5732241379310345e-06, "loss": 0.2753, "step": 150825 }, { "epoch": 1.48, "grad_norm": 3.33396053314209, "learning_rate": 2.5727931034482763e-06, "loss": 0.1373, "step": 150850 }, { "epoch": 1.48, "grad_norm": 4.343509674072266, "learning_rate": 2.572362068965518e-06, "loss": 0.304, "step": 150875 }, { "epoch": 1.48, "grad_norm": 8.615544319152832, "learning_rate": 2.5719310344827584e-06, "loss": 0.1612, "step": 150900 }, { "epoch": 1.48, "grad_norm": 4.80123233795166, "learning_rate": 2.5715000000000003e-06, "loss": 0.264, "step": 150925 }, { "epoch": 1.48, "grad_norm": 5.648296356201172, "learning_rate": 2.5710689655172417e-06, "loss": 0.1354, "step": 150950 }, { "epoch": 1.48, "grad_norm": 6.317795753479004, "learning_rate": 2.570637931034483e-06, "loss": 0.3747, "step": 150975 }, { "epoch": 1.49, "grad_norm": 8.762134552001953, "learning_rate": 2.5702068965517247e-06, "loss": 0.1803, "step": 151000 }, { "epoch": 1.49, "grad_norm": 4.294122219085693, "learning_rate": 2.5697758620689657e-06, "loss": 0.288, "step": 151025 }, { "epoch": 1.49, "grad_norm": 9.555912971496582, "learning_rate": 2.569344827586207e-06, "loss": 0.1132, "step": 151050 }, { "epoch": 1.49, "grad_norm": 3.79917311668396, "learning_rate": 2.5689137931034486e-06, "loss": 0.2651, "step": 151075 }, { "epoch": 1.49, "grad_norm": 6.333556652069092, "learning_rate": 2.56848275862069e-06, "loss": 0.1595, "step": 151100 }, { "epoch": 1.49, "grad_norm": 3.528879165649414, "learning_rate": 2.568051724137931e-06, "loss": 0.2992, "step": 151125 }, { "epoch": 1.49, "grad_norm": 9.93423080444336, "learning_rate": 2.5676206896551725e-06, "loss": 0.1724, "step": 151150 }, { "epoch": 1.49, "grad_norm": 4.605438232421875, "learning_rate": 2.567189655172414e-06, "loss": 0.3067, "step": 151175 }, { "epoch": 1.49, "grad_norm": 2.6530301570892334, "learning_rate": 2.5667586206896555e-06, "loss": 0.1352, "step": 151200 }, { "epoch": 1.49, "grad_norm": 4.29927921295166, "learning_rate": 2.566327586206897e-06, "loss": 0.2436, "step": 151225 }, { "epoch": 1.49, "grad_norm": 7.494663715362549, "learning_rate": 2.565896551724138e-06, "loss": 0.1574, "step": 151250 }, { "epoch": 1.49, "grad_norm": 6.01478385925293, "learning_rate": 2.5654655172413794e-06, "loss": 0.2951, "step": 151275 }, { "epoch": 1.49, "grad_norm": 5.563056468963623, "learning_rate": 2.565034482758621e-06, "loss": 0.1336, "step": 151300 }, { "epoch": 1.49, "grad_norm": 6.5338592529296875, "learning_rate": 2.5646034482758623e-06, "loss": 0.2342, "step": 151325 }, { "epoch": 1.49, "grad_norm": 4.522588729858398, "learning_rate": 2.5641724137931033e-06, "loss": 0.1775, "step": 151350 }, { "epoch": 1.49, "grad_norm": 4.664932727813721, "learning_rate": 2.563741379310345e-06, "loss": 0.259, "step": 151375 }, { "epoch": 1.49, "grad_norm": 5.686341762542725, "learning_rate": 2.5633103448275863e-06, "loss": 0.1206, "step": 151400 }, { "epoch": 1.49, "grad_norm": 4.60118293762207, "learning_rate": 2.562879310344828e-06, "loss": 0.3049, "step": 151425 }, { "epoch": 1.49, "grad_norm": 3.8298261165618896, "learning_rate": 2.5624482758620688e-06, "loss": 0.1303, "step": 151450 }, { "epoch": 1.49, "grad_norm": 4.232995510101318, "learning_rate": 2.56201724137931e-06, "loss": 0.2836, "step": 151475 }, { "epoch": 1.49, "grad_norm": 6.168298244476318, "learning_rate": 2.561586206896552e-06, "loss": 0.1681, "step": 151500 }, { "epoch": 1.49, "grad_norm": 4.6441874504089355, "learning_rate": 2.5611551724137935e-06, "loss": 0.2832, "step": 151525 }, { "epoch": 1.49, "grad_norm": 1.1200690269470215, "learning_rate": 2.560724137931035e-06, "loss": 0.1375, "step": 151550 }, { "epoch": 1.49, "grad_norm": 4.254009246826172, "learning_rate": 2.560293103448276e-06, "loss": 0.2762, "step": 151575 }, { "epoch": 1.49, "grad_norm": 12.61451530456543, "learning_rate": 2.5598620689655175e-06, "loss": 0.1458, "step": 151600 }, { "epoch": 1.49, "grad_norm": 4.068101406097412, "learning_rate": 2.559431034482759e-06, "loss": 0.3022, "step": 151625 }, { "epoch": 1.49, "grad_norm": 8.257975578308105, "learning_rate": 2.5590000000000004e-06, "loss": 0.1614, "step": 151650 }, { "epoch": 1.49, "grad_norm": 3.6540544033050537, "learning_rate": 2.5585689655172414e-06, "loss": 0.2722, "step": 151675 }, { "epoch": 1.49, "grad_norm": 12.588558197021484, "learning_rate": 2.558137931034483e-06, "loss": 0.1525, "step": 151700 }, { "epoch": 1.49, "grad_norm": 4.812437057495117, "learning_rate": 2.5577068965517243e-06, "loss": 0.2557, "step": 151725 }, { "epoch": 1.49, "grad_norm": 10.44137191772461, "learning_rate": 2.557275862068966e-06, "loss": 0.1648, "step": 151750 }, { "epoch": 1.49, "grad_norm": 4.887871742248535, "learning_rate": 2.5568448275862073e-06, "loss": 0.3128, "step": 151775 }, { "epoch": 1.49, "grad_norm": 12.735739707946777, "learning_rate": 2.5564137931034483e-06, "loss": 0.0958, "step": 151800 }, { "epoch": 1.49, "grad_norm": 3.6883394718170166, "learning_rate": 2.5559827586206898e-06, "loss": 0.2619, "step": 151825 }, { "epoch": 1.49, "grad_norm": 9.360686302185059, "learning_rate": 2.555551724137931e-06, "loss": 0.1661, "step": 151850 }, { "epoch": 1.49, "grad_norm": 4.793493747711182, "learning_rate": 2.5551206896551727e-06, "loss": 0.297, "step": 151875 }, { "epoch": 1.49, "grad_norm": 10.98340892791748, "learning_rate": 2.5546896551724137e-06, "loss": 0.1214, "step": 151900 }, { "epoch": 1.49, "grad_norm": 5.429880619049072, "learning_rate": 2.554258620689655e-06, "loss": 0.2702, "step": 151925 }, { "epoch": 1.49, "grad_norm": 10.258033752441406, "learning_rate": 2.5538275862068966e-06, "loss": 0.1288, "step": 151950 }, { "epoch": 1.49, "grad_norm": 4.466560363769531, "learning_rate": 2.553396551724138e-06, "loss": 0.2279, "step": 151975 }, { "epoch": 1.49, "grad_norm": 10.318778991699219, "learning_rate": 2.55296551724138e-06, "loss": 0.1393, "step": 152000 }, { "epoch": 1.5, "grad_norm": 5.66601037979126, "learning_rate": 2.5525344827586206e-06, "loss": 0.2919, "step": 152025 }, { "epoch": 1.5, "grad_norm": 8.22294807434082, "learning_rate": 2.552103448275862e-06, "loss": 0.1481, "step": 152050 }, { "epoch": 1.5, "grad_norm": 5.486703395843506, "learning_rate": 2.551672413793104e-06, "loss": 0.2814, "step": 152075 }, { "epoch": 1.5, "grad_norm": 12.919286727905273, "learning_rate": 2.5512413793103453e-06, "loss": 0.1407, "step": 152100 }, { "epoch": 1.5, "grad_norm": 4.797436237335205, "learning_rate": 2.550810344827586e-06, "loss": 0.2684, "step": 152125 }, { "epoch": 1.5, "grad_norm": 5.912746429443359, "learning_rate": 2.550379310344828e-06, "loss": 0.1185, "step": 152150 }, { "epoch": 1.5, "grad_norm": 4.462306976318359, "learning_rate": 2.5499482758620693e-06, "loss": 0.2736, "step": 152175 }, { "epoch": 1.5, "grad_norm": 12.919903755187988, "learning_rate": 2.5495172413793107e-06, "loss": 0.1606, "step": 152200 }, { "epoch": 1.5, "grad_norm": 3.90950345993042, "learning_rate": 2.549086206896552e-06, "loss": 0.3218, "step": 152225 }, { "epoch": 1.5, "grad_norm": 4.983852863311768, "learning_rate": 2.5486551724137932e-06, "loss": 0.1246, "step": 152250 }, { "epoch": 1.5, "grad_norm": 4.0756611824035645, "learning_rate": 2.5482241379310347e-06, "loss": 0.2984, "step": 152275 }, { "epoch": 1.5, "grad_norm": 14.198098182678223, "learning_rate": 2.547793103448276e-06, "loss": 0.1444, "step": 152300 }, { "epoch": 1.5, "grad_norm": 5.9821906089782715, "learning_rate": 2.5473620689655176e-06, "loss": 0.2701, "step": 152325 }, { "epoch": 1.5, "grad_norm": 2.6629750728607178, "learning_rate": 2.5469310344827586e-06, "loss": 0.1203, "step": 152350 }, { "epoch": 1.5, "grad_norm": 5.292853832244873, "learning_rate": 2.5465e-06, "loss": 0.2733, "step": 152375 }, { "epoch": 1.5, "grad_norm": 7.062783718109131, "learning_rate": 2.5460689655172416e-06, "loss": 0.1402, "step": 152400 }, { "epoch": 1.5, "grad_norm": 3.8041250705718994, "learning_rate": 2.545637931034483e-06, "loss": 0.2765, "step": 152425 }, { "epoch": 1.5, "grad_norm": 8.182955741882324, "learning_rate": 2.5452068965517245e-06, "loss": 0.144, "step": 152450 }, { "epoch": 1.5, "grad_norm": 3.922700881958008, "learning_rate": 2.5447758620689655e-06, "loss": 0.294, "step": 152475 }, { "epoch": 1.5, "grad_norm": 9.360034942626953, "learning_rate": 2.544344827586207e-06, "loss": 0.1825, "step": 152500 }, { "epoch": 1.5, "grad_norm": 3.253108024597168, "learning_rate": 2.5439137931034484e-06, "loss": 0.2475, "step": 152525 }, { "epoch": 1.5, "grad_norm": 10.78539752960205, "learning_rate": 2.54348275862069e-06, "loss": 0.1304, "step": 152550 }, { "epoch": 1.5, "grad_norm": 4.121627330780029, "learning_rate": 2.543051724137931e-06, "loss": 0.2939, "step": 152575 }, { "epoch": 1.5, "grad_norm": 2.7987170219421387, "learning_rate": 2.5426206896551724e-06, "loss": 0.1495, "step": 152600 }, { "epoch": 1.5, "grad_norm": 4.451186180114746, "learning_rate": 2.542189655172414e-06, "loss": 0.3055, "step": 152625 }, { "epoch": 1.5, "grad_norm": 7.76789665222168, "learning_rate": 2.5417586206896557e-06, "loss": 0.1205, "step": 152650 }, { "epoch": 1.5, "grad_norm": 5.4057230949401855, "learning_rate": 2.541327586206897e-06, "loss": 0.2905, "step": 152675 }, { "epoch": 1.5, "grad_norm": 15.399250984191895, "learning_rate": 2.5408965517241378e-06, "loss": 0.2001, "step": 152700 }, { "epoch": 1.5, "grad_norm": 5.892416000366211, "learning_rate": 2.5404655172413796e-06, "loss": 0.3147, "step": 152725 }, { "epoch": 1.5, "grad_norm": 8.150996208190918, "learning_rate": 2.540034482758621e-06, "loss": 0.1237, "step": 152750 }, { "epoch": 1.5, "grad_norm": 4.689450740814209, "learning_rate": 2.5396034482758626e-06, "loss": 0.2689, "step": 152775 }, { "epoch": 1.5, "grad_norm": 7.584892749786377, "learning_rate": 2.5391724137931036e-06, "loss": 0.1253, "step": 152800 }, { "epoch": 1.5, "grad_norm": 2.784268856048584, "learning_rate": 2.538741379310345e-06, "loss": 0.2632, "step": 152825 }, { "epoch": 1.5, "grad_norm": 5.137351989746094, "learning_rate": 2.5383103448275865e-06, "loss": 0.1304, "step": 152850 }, { "epoch": 1.5, "grad_norm": 4.595893859863281, "learning_rate": 2.537879310344828e-06, "loss": 0.3131, "step": 152875 }, { "epoch": 1.5, "grad_norm": 7.649237632751465, "learning_rate": 2.5374482758620694e-06, "loss": 0.1782, "step": 152900 }, { "epoch": 1.5, "grad_norm": 4.912304878234863, "learning_rate": 2.5370172413793104e-06, "loss": 0.2544, "step": 152925 }, { "epoch": 1.5, "grad_norm": 15.824875831604004, "learning_rate": 2.536586206896552e-06, "loss": 0.1897, "step": 152950 }, { "epoch": 1.5, "grad_norm": 5.589894771575928, "learning_rate": 2.5361551724137934e-06, "loss": 0.2886, "step": 152975 }, { "epoch": 1.5, "grad_norm": 7.620635032653809, "learning_rate": 2.535724137931035e-06, "loss": 0.1292, "step": 153000 }, { "epoch": 1.51, "grad_norm": 5.015040874481201, "learning_rate": 2.5353103448275863e-06, "loss": 0.2697, "step": 153025 }, { "epoch": 1.51, "grad_norm": 4.801722526550293, "learning_rate": 2.5348793103448277e-06, "loss": 0.1389, "step": 153050 }, { "epoch": 1.51, "grad_norm": 4.274822235107422, "learning_rate": 2.534448275862069e-06, "loss": 0.2453, "step": 153075 }, { "epoch": 1.51, "grad_norm": 10.922648429870605, "learning_rate": 2.5340172413793106e-06, "loss": 0.1155, "step": 153100 }, { "epoch": 1.51, "grad_norm": 14.113821983337402, "learning_rate": 2.5335862068965517e-06, "loss": 0.3147, "step": 153125 }, { "epoch": 1.51, "grad_norm": 6.960973262786865, "learning_rate": 2.533155172413793e-06, "loss": 0.1384, "step": 153150 }, { "epoch": 1.51, "grad_norm": 2.964401960372925, "learning_rate": 2.5327241379310346e-06, "loss": 0.2926, "step": 153175 }, { "epoch": 1.51, "grad_norm": 9.725415229797363, "learning_rate": 2.532293103448276e-06, "loss": 0.1692, "step": 153200 }, { "epoch": 1.51, "grad_norm": 4.939337253570557, "learning_rate": 2.5318620689655175e-06, "loss": 0.242, "step": 153225 }, { "epoch": 1.51, "grad_norm": 5.26843786239624, "learning_rate": 2.5314310344827585e-06, "loss": 0.1664, "step": 153250 }, { "epoch": 1.51, "grad_norm": 5.703664779663086, "learning_rate": 2.531e-06, "loss": 0.251, "step": 153275 }, { "epoch": 1.51, "grad_norm": 8.328317642211914, "learning_rate": 2.5305689655172414e-06, "loss": 0.1383, "step": 153300 }, { "epoch": 1.51, "grad_norm": 4.547614574432373, "learning_rate": 2.5301379310344833e-06, "loss": 0.2166, "step": 153325 }, { "epoch": 1.51, "grad_norm": 10.490044593811035, "learning_rate": 2.529706896551724e-06, "loss": 0.1279, "step": 153350 }, { "epoch": 1.51, "grad_norm": 4.740951061248779, "learning_rate": 2.5292758620689654e-06, "loss": 0.2658, "step": 153375 }, { "epoch": 1.51, "grad_norm": 10.09807300567627, "learning_rate": 2.5288448275862073e-06, "loss": 0.1244, "step": 153400 }, { "epoch": 1.51, "grad_norm": 4.293422222137451, "learning_rate": 2.5284137931034487e-06, "loss": 0.247, "step": 153425 }, { "epoch": 1.51, "grad_norm": 13.655866622924805, "learning_rate": 2.52798275862069e-06, "loss": 0.1513, "step": 153450 }, { "epoch": 1.51, "grad_norm": 3.8494789600372314, "learning_rate": 2.5275517241379312e-06, "loss": 0.3038, "step": 153475 }, { "epoch": 1.51, "grad_norm": 9.155221939086914, "learning_rate": 2.5271206896551727e-06, "loss": 0.1223, "step": 153500 }, { "epoch": 1.51, "grad_norm": 5.383869171142578, "learning_rate": 2.526689655172414e-06, "loss": 0.3214, "step": 153525 }, { "epoch": 1.51, "grad_norm": 9.277100563049316, "learning_rate": 2.5262586206896556e-06, "loss": 0.1429, "step": 153550 }, { "epoch": 1.51, "grad_norm": 4.790120601654053, "learning_rate": 2.5258275862068966e-06, "loss": 0.2565, "step": 153575 }, { "epoch": 1.51, "grad_norm": 3.893601655960083, "learning_rate": 2.525396551724138e-06, "loss": 0.1179, "step": 153600 }, { "epoch": 1.51, "grad_norm": 3.8294363021850586, "learning_rate": 2.5249655172413795e-06, "loss": 0.2881, "step": 153625 }, { "epoch": 1.51, "grad_norm": 3.8015620708465576, "learning_rate": 2.524534482758621e-06, "loss": 0.1683, "step": 153650 }, { "epoch": 1.51, "grad_norm": 6.068688869476318, "learning_rate": 2.5241034482758624e-06, "loss": 0.2803, "step": 153675 }, { "epoch": 1.51, "grad_norm": 13.691370964050293, "learning_rate": 2.5236724137931035e-06, "loss": 0.1612, "step": 153700 }, { "epoch": 1.51, "grad_norm": 9.521845817565918, "learning_rate": 2.523241379310345e-06, "loss": 0.2944, "step": 153725 }, { "epoch": 1.51, "grad_norm": 7.584008693695068, "learning_rate": 2.5228103448275864e-06, "loss": 0.1088, "step": 153750 }, { "epoch": 1.51, "grad_norm": 14.06822395324707, "learning_rate": 2.522379310344828e-06, "loss": 0.3364, "step": 153775 }, { "epoch": 1.51, "grad_norm": 5.929850101470947, "learning_rate": 2.521948275862069e-06, "loss": 0.1705, "step": 153800 }, { "epoch": 1.51, "grad_norm": 4.46290397644043, "learning_rate": 2.5215172413793103e-06, "loss": 0.2675, "step": 153825 }, { "epoch": 1.51, "grad_norm": 8.279305458068848, "learning_rate": 2.521086206896552e-06, "loss": 0.1523, "step": 153850 }, { "epoch": 1.51, "grad_norm": 4.414818286895752, "learning_rate": 2.5206551724137933e-06, "loss": 0.3033, "step": 153875 }, { "epoch": 1.51, "grad_norm": 9.483975410461426, "learning_rate": 2.520224137931035e-06, "loss": 0.1324, "step": 153900 }, { "epoch": 1.51, "grad_norm": 4.598659992218018, "learning_rate": 2.5197931034482757e-06, "loss": 0.3227, "step": 153925 }, { "epoch": 1.51, "grad_norm": 14.335415840148926, "learning_rate": 2.519362068965517e-06, "loss": 0.1405, "step": 153950 }, { "epoch": 1.51, "grad_norm": 3.6178030967712402, "learning_rate": 2.518931034482759e-06, "loss": 0.2817, "step": 153975 }, { "epoch": 1.51, "grad_norm": 11.551432609558105, "learning_rate": 2.5185000000000005e-06, "loss": 0.1189, "step": 154000 }, { "epoch": 1.51, "grad_norm": 4.404358386993408, "learning_rate": 2.518068965517241e-06, "loss": 0.2595, "step": 154025 }, { "epoch": 1.52, "grad_norm": 8.769397735595703, "learning_rate": 2.517637931034483e-06, "loss": 0.1353, "step": 154050 }, { "epoch": 1.52, "grad_norm": 4.7564496994018555, "learning_rate": 2.5172068965517245e-06, "loss": 0.2914, "step": 154075 }, { "epoch": 1.52, "grad_norm": 8.677517890930176, "learning_rate": 2.516775862068966e-06, "loss": 0.1381, "step": 154100 }, { "epoch": 1.52, "grad_norm": 2.9164493083953857, "learning_rate": 2.5163448275862074e-06, "loss": 0.256, "step": 154125 }, { "epoch": 1.52, "grad_norm": 4.620041847229004, "learning_rate": 2.5159137931034484e-06, "loss": 0.154, "step": 154150 }, { "epoch": 1.52, "grad_norm": 4.601070880889893, "learning_rate": 2.51548275862069e-06, "loss": 0.2871, "step": 154175 }, { "epoch": 1.52, "grad_norm": 8.23061752319336, "learning_rate": 2.5150517241379313e-06, "loss": 0.1357, "step": 154200 }, { "epoch": 1.52, "grad_norm": 5.885787487030029, "learning_rate": 2.514620689655173e-06, "loss": 0.2785, "step": 154225 }, { "epoch": 1.52, "grad_norm": 8.118291854858398, "learning_rate": 2.514189655172414e-06, "loss": 0.1298, "step": 154250 }, { "epoch": 1.52, "grad_norm": 4.830206871032715, "learning_rate": 2.5137586206896553e-06, "loss": 0.2956, "step": 154275 }, { "epoch": 1.52, "grad_norm": 11.458975791931152, "learning_rate": 2.5133275862068967e-06, "loss": 0.1453, "step": 154300 }, { "epoch": 1.52, "grad_norm": 6.2439045906066895, "learning_rate": 2.512896551724138e-06, "loss": 0.2808, "step": 154325 }, { "epoch": 1.52, "grad_norm": 9.327507019042969, "learning_rate": 2.5124655172413797e-06, "loss": 0.1832, "step": 154350 }, { "epoch": 1.52, "grad_norm": 3.4012911319732666, "learning_rate": 2.5120344827586207e-06, "loss": 0.2362, "step": 154375 }, { "epoch": 1.52, "grad_norm": 5.976316928863525, "learning_rate": 2.511603448275862e-06, "loss": 0.1168, "step": 154400 }, { "epoch": 1.52, "grad_norm": 3.18222975730896, "learning_rate": 2.5111724137931036e-06, "loss": 0.2795, "step": 154425 }, { "epoch": 1.52, "grad_norm": 12.301335334777832, "learning_rate": 2.510741379310345e-06, "loss": 0.1599, "step": 154450 }, { "epoch": 1.52, "grad_norm": 3.0916543006896973, "learning_rate": 2.510310344827586e-06, "loss": 0.2904, "step": 154475 }, { "epoch": 1.52, "grad_norm": 10.973298072814941, "learning_rate": 2.5098793103448275e-06, "loss": 0.1368, "step": 154500 }, { "epoch": 1.52, "grad_norm": 5.548436641693115, "learning_rate": 2.509448275862069e-06, "loss": 0.3185, "step": 154525 }, { "epoch": 1.52, "grad_norm": 6.875497341156006, "learning_rate": 2.509017241379311e-06, "loss": 0.1844, "step": 154550 }, { "epoch": 1.52, "grad_norm": 5.7802324295043945, "learning_rate": 2.5085862068965523e-06, "loss": 0.3301, "step": 154575 }, { "epoch": 1.52, "grad_norm": 5.222450256347656, "learning_rate": 2.508155172413793e-06, "loss": 0.1344, "step": 154600 }, { "epoch": 1.52, "grad_norm": 4.349041938781738, "learning_rate": 2.507724137931035e-06, "loss": 0.2514, "step": 154625 }, { "epoch": 1.52, "grad_norm": 5.415904521942139, "learning_rate": 2.5072931034482763e-06, "loss": 0.1619, "step": 154650 }, { "epoch": 1.52, "grad_norm": 3.9846408367156982, "learning_rate": 2.5068620689655177e-06, "loss": 0.2594, "step": 154675 }, { "epoch": 1.52, "grad_norm": 6.849225997924805, "learning_rate": 2.5064310344827588e-06, "loss": 0.156, "step": 154700 }, { "epoch": 1.52, "grad_norm": 5.397093772888184, "learning_rate": 2.5060000000000002e-06, "loss": 0.3123, "step": 154725 }, { "epoch": 1.52, "grad_norm": 10.69874095916748, "learning_rate": 2.5055689655172417e-06, "loss": 0.1645, "step": 154750 }, { "epoch": 1.52, "grad_norm": 5.4454193115234375, "learning_rate": 2.505137931034483e-06, "loss": 0.233, "step": 154775 }, { "epoch": 1.52, "grad_norm": 10.171777725219727, "learning_rate": 2.5047068965517246e-06, "loss": 0.1246, "step": 154800 }, { "epoch": 1.52, "grad_norm": 3.69307017326355, "learning_rate": 2.5042758620689656e-06, "loss": 0.2485, "step": 154825 }, { "epoch": 1.52, "grad_norm": 11.036515235900879, "learning_rate": 2.503844827586207e-06, "loss": 0.1219, "step": 154850 }, { "epoch": 1.52, "grad_norm": 6.266992092132568, "learning_rate": 2.5034137931034485e-06, "loss": 0.3393, "step": 154875 }, { "epoch": 1.52, "grad_norm": 7.614490509033203, "learning_rate": 2.50298275862069e-06, "loss": 0.1741, "step": 154900 }, { "epoch": 1.52, "grad_norm": 3.9661507606506348, "learning_rate": 2.502551724137931e-06, "loss": 0.266, "step": 154925 }, { "epoch": 1.52, "grad_norm": 12.509282112121582, "learning_rate": 2.502137931034483e-06, "loss": 0.1536, "step": 154950 }, { "epoch": 1.52, "grad_norm": 5.158587455749512, "learning_rate": 2.5017068965517244e-06, "loss": 0.2572, "step": 154975 }, { "epoch": 1.52, "grad_norm": 7.659351825714111, "learning_rate": 2.5012931034482763e-06, "loss": 0.157, "step": 155000 }, { "epoch": 1.52, "grad_norm": 3.9085028171539307, "learning_rate": 2.5008620689655173e-06, "loss": 0.2912, "step": 155025 }, { "epoch": 1.52, "grad_norm": 33.56289291381836, "learning_rate": 2.5004310344827587e-06, "loss": 0.1352, "step": 155050 }, { "epoch": 1.53, "grad_norm": 3.879606008529663, "learning_rate": 2.5e-06, "loss": 0.2763, "step": 155075 }, { "epoch": 1.53, "grad_norm": 5.978625774383545, "learning_rate": 2.4995689655172417e-06, "loss": 0.1311, "step": 155100 }, { "epoch": 1.53, "grad_norm": 5.852541446685791, "learning_rate": 2.499137931034483e-06, "loss": 0.2728, "step": 155125 }, { "epoch": 1.53, "grad_norm": 0.5656980872154236, "learning_rate": 2.4987068965517246e-06, "loss": 0.1576, "step": 155150 }, { "epoch": 1.53, "grad_norm": 3.8566436767578125, "learning_rate": 2.4982758620689656e-06, "loss": 0.308, "step": 155175 }, { "epoch": 1.53, "grad_norm": 6.1829915046691895, "learning_rate": 2.497844827586207e-06, "loss": 0.1454, "step": 155200 }, { "epoch": 1.53, "grad_norm": 7.758937358856201, "learning_rate": 2.4974137931034485e-06, "loss": 0.3147, "step": 155225 }, { "epoch": 1.53, "grad_norm": 4.733954429626465, "learning_rate": 2.49698275862069e-06, "loss": 0.1239, "step": 155250 }, { "epoch": 1.53, "grad_norm": 4.991767406463623, "learning_rate": 2.496551724137931e-06, "loss": 0.2527, "step": 155275 }, { "epoch": 1.53, "grad_norm": 15.175393104553223, "learning_rate": 2.4961206896551725e-06, "loss": 0.1399, "step": 155300 }, { "epoch": 1.53, "grad_norm": 4.489403247833252, "learning_rate": 2.495689655172414e-06, "loss": 0.2677, "step": 155325 }, { "epoch": 1.53, "grad_norm": 11.16202163696289, "learning_rate": 2.4952586206896554e-06, "loss": 0.1675, "step": 155350 }, { "epoch": 1.53, "grad_norm": 3.3412559032440186, "learning_rate": 2.494827586206897e-06, "loss": 0.2856, "step": 155375 }, { "epoch": 1.53, "grad_norm": 9.68717098236084, "learning_rate": 2.4943965517241383e-06, "loss": 0.156, "step": 155400 }, { "epoch": 1.53, "grad_norm": 4.694990634918213, "learning_rate": 2.4939655172413797e-06, "loss": 0.2807, "step": 155425 }, { "epoch": 1.53, "grad_norm": 8.403200149536133, "learning_rate": 2.4935344827586208e-06, "loss": 0.1547, "step": 155450 }, { "epoch": 1.53, "grad_norm": 5.959311008453369, "learning_rate": 2.4931034482758622e-06, "loss": 0.3102, "step": 155475 }, { "epoch": 1.53, "grad_norm": 6.883479118347168, "learning_rate": 2.4926724137931037e-06, "loss": 0.1238, "step": 155500 }, { "epoch": 1.53, "grad_norm": 4.925252437591553, "learning_rate": 2.492241379310345e-06, "loss": 0.3, "step": 155525 }, { "epoch": 1.53, "grad_norm": 10.257046699523926, "learning_rate": 2.4918103448275866e-06, "loss": 0.1326, "step": 155550 }, { "epoch": 1.53, "grad_norm": 4.420131206512451, "learning_rate": 2.4913793103448276e-06, "loss": 0.3864, "step": 155575 }, { "epoch": 1.53, "grad_norm": 7.27385139465332, "learning_rate": 2.490948275862069e-06, "loss": 0.1585, "step": 155600 }, { "epoch": 1.53, "grad_norm": 3.8787124156951904, "learning_rate": 2.4905172413793106e-06, "loss": 0.2979, "step": 155625 }, { "epoch": 1.53, "grad_norm": 7.599272727966309, "learning_rate": 2.490086206896552e-06, "loss": 0.1526, "step": 155650 }, { "epoch": 1.53, "grad_norm": 6.373057842254639, "learning_rate": 2.489655172413793e-06, "loss": 0.3264, "step": 155675 }, { "epoch": 1.53, "grad_norm": 12.006546974182129, "learning_rate": 2.489224137931035e-06, "loss": 0.1443, "step": 155700 }, { "epoch": 1.53, "grad_norm": 5.0811052322387695, "learning_rate": 2.488793103448276e-06, "loss": 0.2881, "step": 155725 }, { "epoch": 1.53, "grad_norm": 11.258222579956055, "learning_rate": 2.4883620689655174e-06, "loss": 0.1206, "step": 155750 }, { "epoch": 1.53, "grad_norm": 5.78834342956543, "learning_rate": 2.487931034482759e-06, "loss": 0.3112, "step": 155775 }, { "epoch": 1.53, "grad_norm": 6.98980712890625, "learning_rate": 2.4875000000000003e-06, "loss": 0.1236, "step": 155800 }, { "epoch": 1.53, "grad_norm": 5.058308124542236, "learning_rate": 2.4870689655172418e-06, "loss": 0.2959, "step": 155825 }, { "epoch": 1.53, "grad_norm": 11.990513801574707, "learning_rate": 2.486637931034483e-06, "loss": 0.1424, "step": 155850 }, { "epoch": 1.53, "grad_norm": 7.030582427978516, "learning_rate": 2.4862068965517243e-06, "loss": 0.266, "step": 155875 }, { "epoch": 1.53, "grad_norm": 6.642551422119141, "learning_rate": 2.4857758620689657e-06, "loss": 0.1399, "step": 155900 }, { "epoch": 1.53, "grad_norm": 6.2497076988220215, "learning_rate": 2.485344827586207e-06, "loss": 0.2918, "step": 155925 }, { "epoch": 1.53, "grad_norm": 5.777909755706787, "learning_rate": 2.4849137931034482e-06, "loss": 0.1099, "step": 155950 }, { "epoch": 1.53, "grad_norm": 6.104795455932617, "learning_rate": 2.4844827586206897e-06, "loss": 0.2635, "step": 155975 }, { "epoch": 1.53, "grad_norm": 5.550318241119385, "learning_rate": 2.484051724137931e-06, "loss": 0.1517, "step": 156000 }, { "epoch": 1.53, "grad_norm": 3.894523859024048, "learning_rate": 2.4836206896551726e-06, "loss": 0.3053, "step": 156025 }, { "epoch": 1.53, "grad_norm": 11.153873443603516, "learning_rate": 2.483189655172414e-06, "loss": 0.1388, "step": 156050 }, { "epoch": 1.54, "grad_norm": 6.143197059631348, "learning_rate": 2.4827586206896555e-06, "loss": 0.2422, "step": 156075 }, { "epoch": 1.54, "grad_norm": 11.334550857543945, "learning_rate": 2.482327586206897e-06, "loss": 0.1704, "step": 156100 }, { "epoch": 1.54, "grad_norm": 5.646172046661377, "learning_rate": 2.481896551724138e-06, "loss": 0.2978, "step": 156125 }, { "epoch": 1.54, "grad_norm": 7.966084957122803, "learning_rate": 2.4814655172413794e-06, "loss": 0.1571, "step": 156150 }, { "epoch": 1.54, "grad_norm": 5.331446170806885, "learning_rate": 2.481034482758621e-06, "loss": 0.2661, "step": 156175 }, { "epoch": 1.54, "grad_norm": 8.433122634887695, "learning_rate": 2.4806034482758624e-06, "loss": 0.1299, "step": 156200 }, { "epoch": 1.54, "grad_norm": 5.735720157623291, "learning_rate": 2.4801724137931034e-06, "loss": 0.2338, "step": 156225 }, { "epoch": 1.54, "grad_norm": 2.7926013469696045, "learning_rate": 2.479741379310345e-06, "loss": 0.1128, "step": 156250 }, { "epoch": 1.54, "grad_norm": 3.823864698410034, "learning_rate": 2.4793103448275867e-06, "loss": 0.2858, "step": 156275 }, { "epoch": 1.54, "grad_norm": 12.226861000061035, "learning_rate": 2.4788793103448278e-06, "loss": 0.1784, "step": 156300 }, { "epoch": 1.54, "grad_norm": 4.169139862060547, "learning_rate": 2.4784482758620692e-06, "loss": 0.3093, "step": 156325 }, { "epoch": 1.54, "grad_norm": 6.678481101989746, "learning_rate": 2.4780172413793107e-06, "loss": 0.1415, "step": 156350 }, { "epoch": 1.54, "grad_norm": 4.263181686401367, "learning_rate": 2.477586206896552e-06, "loss": 0.3003, "step": 156375 }, { "epoch": 1.54, "grad_norm": 12.365376472473145, "learning_rate": 2.477155172413793e-06, "loss": 0.1377, "step": 156400 }, { "epoch": 1.54, "grad_norm": 4.767279624938965, "learning_rate": 2.4767241379310346e-06, "loss": 0.2605, "step": 156425 }, { "epoch": 1.54, "grad_norm": 11.315757751464844, "learning_rate": 2.476293103448276e-06, "loss": 0.1421, "step": 156450 }, { "epoch": 1.54, "grad_norm": 4.144498348236084, "learning_rate": 2.4758620689655175e-06, "loss": 0.2831, "step": 156475 }, { "epoch": 1.54, "grad_norm": 6.516293525695801, "learning_rate": 2.475431034482759e-06, "loss": 0.1216, "step": 156500 }, { "epoch": 1.54, "grad_norm": 5.8859992027282715, "learning_rate": 2.475e-06, "loss": 0.2423, "step": 156525 }, { "epoch": 1.54, "grad_norm": 6.400689125061035, "learning_rate": 2.4745689655172415e-06, "loss": 0.1336, "step": 156550 }, { "epoch": 1.54, "grad_norm": 3.9977223873138428, "learning_rate": 2.474137931034483e-06, "loss": 0.3461, "step": 156575 }, { "epoch": 1.54, "grad_norm": 7.627560138702393, "learning_rate": 2.4737068965517244e-06, "loss": 0.1461, "step": 156600 }, { "epoch": 1.54, "grad_norm": 6.346458435058594, "learning_rate": 2.473275862068966e-06, "loss": 0.2686, "step": 156625 }, { "epoch": 1.54, "grad_norm": 11.622405052185059, "learning_rate": 2.4728448275862073e-06, "loss": 0.1794, "step": 156650 }, { "epoch": 1.54, "grad_norm": 6.721864223480225, "learning_rate": 2.4724137931034483e-06, "loss": 0.3076, "step": 156675 }, { "epoch": 1.54, "grad_norm": 14.911394119262695, "learning_rate": 2.47198275862069e-06, "loss": 0.1438, "step": 156700 }, { "epoch": 1.54, "grad_norm": 9.805874824523926, "learning_rate": 2.4715517241379312e-06, "loss": 0.2449, "step": 156725 }, { "epoch": 1.54, "grad_norm": 10.267083168029785, "learning_rate": 2.4711206896551727e-06, "loss": 0.156, "step": 156750 }, { "epoch": 1.54, "grad_norm": 8.007678031921387, "learning_rate": 2.470689655172414e-06, "loss": 0.2733, "step": 156775 }, { "epoch": 1.54, "grad_norm": 8.327775001525879, "learning_rate": 2.470258620689655e-06, "loss": 0.1501, "step": 156800 }, { "epoch": 1.54, "grad_norm": 2.4993438720703125, "learning_rate": 2.4698275862068967e-06, "loss": 0.3103, "step": 156825 }, { "epoch": 1.54, "grad_norm": 6.246931076049805, "learning_rate": 2.469396551724138e-06, "loss": 0.1117, "step": 156850 }, { "epoch": 1.54, "grad_norm": 3.743746042251587, "learning_rate": 2.4689655172413796e-06, "loss": 0.2435, "step": 156875 }, { "epoch": 1.54, "grad_norm": 2.8818111419677734, "learning_rate": 2.4685344827586206e-06, "loss": 0.1478, "step": 156900 }, { "epoch": 1.54, "grad_norm": 6.005093097686768, "learning_rate": 2.4681034482758625e-06, "loss": 0.2892, "step": 156925 }, { "epoch": 1.54, "grad_norm": 11.749736785888672, "learning_rate": 2.4676724137931035e-06, "loss": 0.1364, "step": 156950 }, { "epoch": 1.54, "grad_norm": 4.394487380981445, "learning_rate": 2.467241379310345e-06, "loss": 0.2991, "step": 156975 }, { "epoch": 1.54, "grad_norm": 6.554175853729248, "learning_rate": 2.4668103448275864e-06, "loss": 0.1521, "step": 157000 }, { "epoch": 1.54, "grad_norm": 3.9761781692504883, "learning_rate": 2.466379310344828e-06, "loss": 0.2606, "step": 157025 }, { "epoch": 1.54, "grad_norm": 11.292840003967285, "learning_rate": 2.4659482758620693e-06, "loss": 0.1711, "step": 157050 }, { "epoch": 1.54, "grad_norm": 6.4091362953186035, "learning_rate": 2.4655172413793104e-06, "loss": 0.2826, "step": 157075 }, { "epoch": 1.55, "grad_norm": 11.427698135375977, "learning_rate": 2.465086206896552e-06, "loss": 0.2163, "step": 157100 }, { "epoch": 1.55, "grad_norm": 5.124078750610352, "learning_rate": 2.4646551724137933e-06, "loss": 0.2983, "step": 157125 }, { "epoch": 1.55, "grad_norm": 8.964085578918457, "learning_rate": 2.4642241379310347e-06, "loss": 0.1282, "step": 157150 }, { "epoch": 1.55, "grad_norm": 4.425196647644043, "learning_rate": 2.4637931034482758e-06, "loss": 0.279, "step": 157175 }, { "epoch": 1.55, "grad_norm": 12.82335090637207, "learning_rate": 2.4633620689655177e-06, "loss": 0.1572, "step": 157200 }, { "epoch": 1.55, "grad_norm": 3.221872329711914, "learning_rate": 2.462931034482759e-06, "loss": 0.3039, "step": 157225 }, { "epoch": 1.55, "grad_norm": 8.286521911621094, "learning_rate": 2.4625e-06, "loss": 0.1466, "step": 157250 }, { "epoch": 1.55, "grad_norm": 4.859644412994385, "learning_rate": 2.4620689655172416e-06, "loss": 0.3183, "step": 157275 }, { "epoch": 1.55, "grad_norm": 9.100564002990723, "learning_rate": 2.461637931034483e-06, "loss": 0.14, "step": 157300 }, { "epoch": 1.55, "grad_norm": 3.4530975818634033, "learning_rate": 2.4612068965517245e-06, "loss": 0.2555, "step": 157325 }, { "epoch": 1.55, "grad_norm": 9.096434593200684, "learning_rate": 2.4607758620689655e-06, "loss": 0.1595, "step": 157350 }, { "epoch": 1.55, "grad_norm": 3.3392436504364014, "learning_rate": 2.460344827586207e-06, "loss": 0.2994, "step": 157375 }, { "epoch": 1.55, "grad_norm": 7.364614486694336, "learning_rate": 2.4599137931034485e-06, "loss": 0.1492, "step": 157400 }, { "epoch": 1.55, "grad_norm": 3.196159601211548, "learning_rate": 2.45948275862069e-06, "loss": 0.2265, "step": 157425 }, { "epoch": 1.55, "grad_norm": 8.258193016052246, "learning_rate": 2.4590517241379314e-06, "loss": 0.1527, "step": 157450 }, { "epoch": 1.55, "grad_norm": 3.568244218826294, "learning_rate": 2.4586206896551724e-06, "loss": 0.2558, "step": 157475 }, { "epoch": 1.55, "grad_norm": 7.965785980224609, "learning_rate": 2.4581896551724143e-06, "loss": 0.1928, "step": 157500 }, { "epoch": 1.55, "grad_norm": 4.241116523742676, "learning_rate": 2.4577586206896553e-06, "loss": 0.2298, "step": 157525 }, { "epoch": 1.55, "grad_norm": 6.188481330871582, "learning_rate": 2.4573275862068968e-06, "loss": 0.1703, "step": 157550 }, { "epoch": 1.55, "grad_norm": 3.779442548751831, "learning_rate": 2.4568965517241382e-06, "loss": 0.2472, "step": 157575 }, { "epoch": 1.55, "grad_norm": 8.365863800048828, "learning_rate": 2.4564655172413797e-06, "loss": 0.1285, "step": 157600 }, { "epoch": 1.55, "grad_norm": 4.929886341094971, "learning_rate": 2.4560344827586207e-06, "loss": 0.2911, "step": 157625 }, { "epoch": 1.55, "grad_norm": 6.851956844329834, "learning_rate": 2.455603448275862e-06, "loss": 0.1568, "step": 157650 }, { "epoch": 1.55, "grad_norm": 7.141958713531494, "learning_rate": 2.4551724137931036e-06, "loss": 0.3134, "step": 157675 }, { "epoch": 1.55, "grad_norm": 7.610015392303467, "learning_rate": 2.454741379310345e-06, "loss": 0.1537, "step": 157700 }, { "epoch": 1.55, "grad_norm": 4.816158771514893, "learning_rate": 2.4543103448275865e-06, "loss": 0.2283, "step": 157725 }, { "epoch": 1.55, "grad_norm": 11.41617488861084, "learning_rate": 2.4538793103448276e-06, "loss": 0.1634, "step": 157750 }, { "epoch": 1.55, "grad_norm": 4.488197326660156, "learning_rate": 2.453448275862069e-06, "loss": 0.3259, "step": 157775 }, { "epoch": 1.55, "grad_norm": 14.087953567504883, "learning_rate": 2.4530172413793105e-06, "loss": 0.1487, "step": 157800 }, { "epoch": 1.55, "grad_norm": 4.529416084289551, "learning_rate": 2.452586206896552e-06, "loss": 0.2459, "step": 157825 }, { "epoch": 1.55, "grad_norm": 13.020464897155762, "learning_rate": 2.4521551724137934e-06, "loss": 0.1274, "step": 157850 }, { "epoch": 1.55, "grad_norm": 4.659538745880127, "learning_rate": 2.451724137931035e-06, "loss": 0.269, "step": 157875 }, { "epoch": 1.55, "grad_norm": 6.021526336669922, "learning_rate": 2.4512931034482763e-06, "loss": 0.1324, "step": 157900 }, { "epoch": 1.55, "grad_norm": 3.543569803237915, "learning_rate": 2.4508620689655173e-06, "loss": 0.2542, "step": 157925 }, { "epoch": 1.55, "grad_norm": 6.8548784255981445, "learning_rate": 2.450431034482759e-06, "loss": 0.1499, "step": 157950 }, { "epoch": 1.55, "grad_norm": 6.115527153015137, "learning_rate": 2.4500000000000003e-06, "loss": 0.3093, "step": 157975 }, { "epoch": 1.55, "grad_norm": 6.058300971984863, "learning_rate": 2.4495689655172417e-06, "loss": 0.192, "step": 158000 }, { "epoch": 1.55, "grad_norm": 3.763888120651245, "learning_rate": 2.4491379310344827e-06, "loss": 0.3056, "step": 158025 }, { "epoch": 1.55, "grad_norm": 11.988862037658691, "learning_rate": 2.448706896551724e-06, "loss": 0.1274, "step": 158050 }, { "epoch": 1.55, "grad_norm": 3.4516165256500244, "learning_rate": 2.4482758620689657e-06, "loss": 0.2698, "step": 158075 }, { "epoch": 1.55, "grad_norm": 7.535016059875488, "learning_rate": 2.447844827586207e-06, "loss": 0.1399, "step": 158100 }, { "epoch": 1.56, "grad_norm": 3.535776376724243, "learning_rate": 2.447413793103448e-06, "loss": 0.2999, "step": 158125 }, { "epoch": 1.56, "grad_norm": 16.45380973815918, "learning_rate": 2.44698275862069e-06, "loss": 0.164, "step": 158150 }, { "epoch": 1.56, "grad_norm": 4.155396461486816, "learning_rate": 2.4465517241379315e-06, "loss": 0.2662, "step": 158175 }, { "epoch": 1.56, "grad_norm": 6.587138652801514, "learning_rate": 2.4461206896551725e-06, "loss": 0.1416, "step": 158200 }, { "epoch": 1.56, "grad_norm": 3.9983367919921875, "learning_rate": 2.445689655172414e-06, "loss": 0.2881, "step": 158225 }, { "epoch": 1.56, "grad_norm": 42.02151870727539, "learning_rate": 2.4452586206896554e-06, "loss": 0.1248, "step": 158250 }, { "epoch": 1.56, "grad_norm": 4.309420108795166, "learning_rate": 2.444827586206897e-06, "loss": 0.2558, "step": 158275 }, { "epoch": 1.56, "grad_norm": 6.356505870819092, "learning_rate": 2.444396551724138e-06, "loss": 0.1528, "step": 158300 }, { "epoch": 1.56, "grad_norm": 3.907864809036255, "learning_rate": 2.4439655172413794e-06, "loss": 0.3152, "step": 158325 }, { "epoch": 1.56, "grad_norm": 10.834966659545898, "learning_rate": 2.443534482758621e-06, "loss": 0.1538, "step": 158350 }, { "epoch": 1.56, "grad_norm": 13.69097900390625, "learning_rate": 2.4431034482758623e-06, "loss": 0.2412, "step": 158375 }, { "epoch": 1.56, "grad_norm": 3.1934711933135986, "learning_rate": 2.4426724137931037e-06, "loss": 0.1451, "step": 158400 }, { "epoch": 1.56, "grad_norm": 3.92370343208313, "learning_rate": 2.442241379310345e-06, "loss": 0.3074, "step": 158425 }, { "epoch": 1.56, "grad_norm": 55.03175735473633, "learning_rate": 2.4418103448275867e-06, "loss": 0.1718, "step": 158450 }, { "epoch": 1.56, "grad_norm": 6.426151275634766, "learning_rate": 2.4413793103448277e-06, "loss": 0.2774, "step": 158475 }, { "epoch": 1.56, "grad_norm": 2.955700635910034, "learning_rate": 2.440948275862069e-06, "loss": 0.1676, "step": 158500 }, { "epoch": 1.56, "grad_norm": 3.115729570388794, "learning_rate": 2.4405172413793106e-06, "loss": 0.2291, "step": 158525 }, { "epoch": 1.56, "grad_norm": 10.895526885986328, "learning_rate": 2.440086206896552e-06, "loss": 0.1525, "step": 158550 }, { "epoch": 1.56, "grad_norm": 4.929636478424072, "learning_rate": 2.439655172413793e-06, "loss": 0.3268, "step": 158575 }, { "epoch": 1.56, "grad_norm": 36.00612258911133, "learning_rate": 2.4392241379310346e-06, "loss": 0.2085, "step": 158600 }, { "epoch": 1.56, "grad_norm": 4.288180351257324, "learning_rate": 2.438793103448276e-06, "loss": 0.3204, "step": 158625 }, { "epoch": 1.56, "grad_norm": 8.356977462768555, "learning_rate": 2.4383620689655175e-06, "loss": 0.1621, "step": 158650 }, { "epoch": 1.56, "grad_norm": 3.7578608989715576, "learning_rate": 2.437931034482759e-06, "loss": 0.2628, "step": 158675 }, { "epoch": 1.56, "grad_norm": 4.563823223114014, "learning_rate": 2.4375e-06, "loss": 0.1307, "step": 158700 }, { "epoch": 1.56, "grad_norm": 9.189638137817383, "learning_rate": 2.437068965517242e-06, "loss": 0.2996, "step": 158725 }, { "epoch": 1.56, "grad_norm": 7.701683521270752, "learning_rate": 2.436637931034483e-06, "loss": 0.1464, "step": 158750 }, { "epoch": 1.56, "grad_norm": 4.231593132019043, "learning_rate": 2.4362068965517243e-06, "loss": 0.3009, "step": 158775 }, { "epoch": 1.56, "grad_norm": 5.889426231384277, "learning_rate": 2.4357758620689658e-06, "loss": 0.1304, "step": 158800 }, { "epoch": 1.56, "grad_norm": 4.03782320022583, "learning_rate": 2.4353448275862072e-06, "loss": 0.3364, "step": 158825 }, { "epoch": 1.56, "grad_norm": 12.423930168151855, "learning_rate": 2.4349137931034487e-06, "loss": 0.1507, "step": 158850 }, { "epoch": 1.56, "grad_norm": 5.540905475616455, "learning_rate": 2.4344827586206897e-06, "loss": 0.2655, "step": 158875 }, { "epoch": 1.56, "grad_norm": 7.820760250091553, "learning_rate": 2.434051724137931e-06, "loss": 0.1607, "step": 158900 }, { "epoch": 1.56, "grad_norm": 4.819976329803467, "learning_rate": 2.4336206896551726e-06, "loss": 0.282, "step": 158925 }, { "epoch": 1.56, "grad_norm": 8.811040878295898, "learning_rate": 2.433189655172414e-06, "loss": 0.1203, "step": 158950 }, { "epoch": 1.56, "grad_norm": 6.021961688995361, "learning_rate": 2.432758620689655e-06, "loss": 0.3165, "step": 158975 }, { "epoch": 1.56, "grad_norm": 8.549330711364746, "learning_rate": 2.4323275862068966e-06, "loss": 0.1401, "step": 159000 }, { "epoch": 1.56, "grad_norm": 4.069479465484619, "learning_rate": 2.431896551724138e-06, "loss": 0.3119, "step": 159025 }, { "epoch": 1.56, "grad_norm": 7.030287265777588, "learning_rate": 2.4314655172413795e-06, "loss": 0.1651, "step": 159050 }, { "epoch": 1.56, "grad_norm": 4.135372161865234, "learning_rate": 2.431034482758621e-06, "loss": 0.2549, "step": 159075 }, { "epoch": 1.56, "grad_norm": 8.853992462158203, "learning_rate": 2.4306034482758624e-06, "loss": 0.187, "step": 159100 }, { "epoch": 1.57, "grad_norm": 4.623022079467773, "learning_rate": 2.430172413793104e-06, "loss": 0.2934, "step": 159125 }, { "epoch": 1.57, "grad_norm": 11.995946884155273, "learning_rate": 2.429741379310345e-06, "loss": 0.1542, "step": 159150 }, { "epoch": 1.57, "grad_norm": 5.231561183929443, "learning_rate": 2.4293103448275864e-06, "loss": 0.2963, "step": 159175 }, { "epoch": 1.57, "grad_norm": 5.7826337814331055, "learning_rate": 2.428879310344828e-06, "loss": 0.1109, "step": 159200 }, { "epoch": 1.57, "grad_norm": 5.386949062347412, "learning_rate": 2.4284482758620693e-06, "loss": 0.2713, "step": 159225 }, { "epoch": 1.57, "grad_norm": 15.36548900604248, "learning_rate": 2.4280172413793103e-06, "loss": 0.1391, "step": 159250 }, { "epoch": 1.57, "grad_norm": 6.936609268188477, "learning_rate": 2.4275862068965518e-06, "loss": 0.3208, "step": 159275 }, { "epoch": 1.57, "grad_norm": 7.324231147766113, "learning_rate": 2.4271551724137932e-06, "loss": 0.1682, "step": 159300 }, { "epoch": 1.57, "grad_norm": 3.7694551944732666, "learning_rate": 2.4267241379310347e-06, "loss": 0.2612, "step": 159325 }, { "epoch": 1.57, "grad_norm": 8.966459274291992, "learning_rate": 2.426293103448276e-06, "loss": 0.1294, "step": 159350 }, { "epoch": 1.57, "grad_norm": 4.644000053405762, "learning_rate": 2.4258620689655176e-06, "loss": 0.2218, "step": 159375 }, { "epoch": 1.57, "grad_norm": 7.154940605163574, "learning_rate": 2.425431034482759e-06, "loss": 0.1442, "step": 159400 }, { "epoch": 1.57, "grad_norm": 5.106271266937256, "learning_rate": 2.425e-06, "loss": 0.2964, "step": 159425 }, { "epoch": 1.57, "grad_norm": 8.693132400512695, "learning_rate": 2.4245689655172415e-06, "loss": 0.1452, "step": 159450 }, { "epoch": 1.57, "grad_norm": 3.5293188095092773, "learning_rate": 2.424137931034483e-06, "loss": 0.2881, "step": 159475 }, { "epoch": 1.57, "grad_norm": 14.185803413391113, "learning_rate": 2.4237068965517244e-06, "loss": 0.119, "step": 159500 }, { "epoch": 1.57, "grad_norm": 4.222503185272217, "learning_rate": 2.4232758620689655e-06, "loss": 0.2214, "step": 159525 }, { "epoch": 1.57, "grad_norm": 7.597829341888428, "learning_rate": 2.422844827586207e-06, "loss": 0.1498, "step": 159550 }, { "epoch": 1.57, "grad_norm": 4.211430072784424, "learning_rate": 2.4224137931034484e-06, "loss": 0.2827, "step": 159575 }, { "epoch": 1.57, "grad_norm": 10.840764045715332, "learning_rate": 2.42198275862069e-06, "loss": 0.1329, "step": 159600 }, { "epoch": 1.57, "grad_norm": 3.6387083530426025, "learning_rate": 2.4215517241379313e-06, "loss": 0.2945, "step": 159625 }, { "epoch": 1.57, "grad_norm": 9.239639282226562, "learning_rate": 2.4211206896551728e-06, "loss": 0.1881, "step": 159650 }, { "epoch": 1.57, "grad_norm": 5.129696846008301, "learning_rate": 2.4206896551724142e-06, "loss": 0.2263, "step": 159675 }, { "epoch": 1.57, "grad_norm": 9.892767906188965, "learning_rate": 2.4202586206896552e-06, "loss": 0.1715, "step": 159700 }, { "epoch": 1.57, "grad_norm": 3.0075221061706543, "learning_rate": 2.4198275862068967e-06, "loss": 0.2922, "step": 159725 }, { "epoch": 1.57, "grad_norm": 5.307960510253906, "learning_rate": 2.419396551724138e-06, "loss": 0.1174, "step": 159750 }, { "epoch": 1.57, "grad_norm": 4.248161315917969, "learning_rate": 2.4189655172413796e-06, "loss": 0.2748, "step": 159775 }, { "epoch": 1.57, "grad_norm": 17.09701919555664, "learning_rate": 2.418534482758621e-06, "loss": 0.1655, "step": 159800 }, { "epoch": 1.57, "grad_norm": 4.422828674316406, "learning_rate": 2.418103448275862e-06, "loss": 0.317, "step": 159825 }, { "epoch": 1.57, "grad_norm": 5.844799041748047, "learning_rate": 2.4176724137931036e-06, "loss": 0.1052, "step": 159850 }, { "epoch": 1.57, "grad_norm": 3.7027037143707275, "learning_rate": 2.417241379310345e-06, "loss": 0.2839, "step": 159875 }, { "epoch": 1.57, "grad_norm": 5.575988292694092, "learning_rate": 2.4168103448275865e-06, "loss": 0.1572, "step": 159900 }, { "epoch": 1.57, "grad_norm": 4.422528266906738, "learning_rate": 2.4163793103448275e-06, "loss": 0.2875, "step": 159925 }, { "epoch": 1.57, "grad_norm": 5.935831069946289, "learning_rate": 2.4159482758620694e-06, "loss": 0.1585, "step": 159950 }, { "epoch": 1.57, "grad_norm": 2.9811222553253174, "learning_rate": 2.4155172413793104e-06, "loss": 0.2688, "step": 159975 }, { "epoch": 1.57, "grad_norm": 6.456181526184082, "learning_rate": 2.415086206896552e-06, "loss": 0.1496, "step": 160000 }, { "epoch": 1.57, "eval_loss": 0.5519216656684875, "eval_runtime": 5761.6493, "eval_samples_per_second": 1.643, "eval_steps_per_second": 0.205, "eval_wer": 0.12632781819352346, "step": 160000 }, { "epoch": 1.57, "grad_norm": 4.574234485626221, "learning_rate": 2.4146551724137933e-06, "loss": 0.2288, "step": 160025 }, { "epoch": 1.57, "grad_norm": 12.530664443969727, "learning_rate": 2.414224137931035e-06, "loss": 0.1244, "step": 160050 }, { "epoch": 1.57, "grad_norm": 8.361808776855469, "learning_rate": 2.4137931034482762e-06, "loss": 0.2478, "step": 160075 }, { "epoch": 1.57, "grad_norm": 6.004552364349365, "learning_rate": 2.4133620689655173e-06, "loss": 0.1309, "step": 160100 }, { "epoch": 1.57, "grad_norm": 4.600777626037598, "learning_rate": 2.4129310344827587e-06, "loss": 0.2605, "step": 160125 }, { "epoch": 1.58, "grad_norm": 8.714323997497559, "learning_rate": 2.4125e-06, "loss": 0.1565, "step": 160150 }, { "epoch": 1.58, "grad_norm": 5.152858734130859, "learning_rate": 2.4120689655172417e-06, "loss": 0.2545, "step": 160175 }, { "epoch": 1.58, "grad_norm": 8.721508026123047, "learning_rate": 2.4116379310344827e-06, "loss": 0.165, "step": 160200 }, { "epoch": 1.58, "grad_norm": 4.881158351898193, "learning_rate": 2.411206896551724e-06, "loss": 0.2921, "step": 160225 }, { "epoch": 1.58, "grad_norm": 6.976289749145508, "learning_rate": 2.4107758620689656e-06, "loss": 0.1139, "step": 160250 }, { "epoch": 1.58, "grad_norm": 4.558287620544434, "learning_rate": 2.410344827586207e-06, "loss": 0.327, "step": 160275 }, { "epoch": 1.58, "grad_norm": 12.10889720916748, "learning_rate": 2.4099137931034485e-06, "loss": 0.1576, "step": 160300 }, { "epoch": 1.58, "grad_norm": 4.592746734619141, "learning_rate": 2.40948275862069e-06, "loss": 0.3244, "step": 160325 }, { "epoch": 1.58, "grad_norm": 6.176338195800781, "learning_rate": 2.4090517241379314e-06, "loss": 0.1288, "step": 160350 }, { "epoch": 1.58, "grad_norm": 4.634881019592285, "learning_rate": 2.4086206896551725e-06, "loss": 0.2812, "step": 160375 }, { "epoch": 1.58, "grad_norm": 10.707003593444824, "learning_rate": 2.408189655172414e-06, "loss": 0.1452, "step": 160400 }, { "epoch": 1.58, "grad_norm": 4.504674911499023, "learning_rate": 2.4077586206896554e-06, "loss": 0.2715, "step": 160425 }, { "epoch": 1.58, "grad_norm": 10.954144477844238, "learning_rate": 2.407327586206897e-06, "loss": 0.1328, "step": 160450 }, { "epoch": 1.58, "grad_norm": 7.677804946899414, "learning_rate": 2.406896551724138e-06, "loss": 0.2585, "step": 160475 }, { "epoch": 1.58, "grad_norm": 4.555694103240967, "learning_rate": 2.4064655172413793e-06, "loss": 0.1284, "step": 160500 }, { "epoch": 1.58, "grad_norm": 5.495276927947998, "learning_rate": 2.406034482758621e-06, "loss": 0.2917, "step": 160525 }, { "epoch": 1.58, "grad_norm": 7.6894659996032715, "learning_rate": 2.4056034482758622e-06, "loss": 0.1607, "step": 160550 }, { "epoch": 1.58, "grad_norm": 4.391872882843018, "learning_rate": 2.4051724137931037e-06, "loss": 0.2511, "step": 160575 }, { "epoch": 1.58, "grad_norm": 9.303547859191895, "learning_rate": 2.404741379310345e-06, "loss": 0.1602, "step": 160600 }, { "epoch": 1.58, "grad_norm": 4.136455535888672, "learning_rate": 2.4043103448275866e-06, "loss": 0.2662, "step": 160625 }, { "epoch": 1.58, "grad_norm": 7.839024543762207, "learning_rate": 2.4038793103448276e-06, "loss": 0.1458, "step": 160650 }, { "epoch": 1.58, "grad_norm": 5.637340068817139, "learning_rate": 2.403448275862069e-06, "loss": 0.2928, "step": 160675 }, { "epoch": 1.58, "grad_norm": 7.803966522216797, "learning_rate": 2.4030172413793105e-06, "loss": 0.1197, "step": 160700 }, { "epoch": 1.58, "grad_norm": 3.9792113304138184, "learning_rate": 2.402586206896552e-06, "loss": 0.2906, "step": 160725 }, { "epoch": 1.58, "grad_norm": 12.620719909667969, "learning_rate": 2.4021551724137935e-06, "loss": 0.1457, "step": 160750 }, { "epoch": 1.58, "grad_norm": 3.7629992961883545, "learning_rate": 2.4017241379310345e-06, "loss": 0.2672, "step": 160775 }, { "epoch": 1.58, "grad_norm": 6.996427059173584, "learning_rate": 2.401293103448276e-06, "loss": 0.149, "step": 160800 }, { "epoch": 1.58, "grad_norm": 5.051892280578613, "learning_rate": 2.4008620689655174e-06, "loss": 0.2991, "step": 160825 }, { "epoch": 1.58, "grad_norm": 8.009376525878906, "learning_rate": 2.400431034482759e-06, "loss": 0.1589, "step": 160850 }, { "epoch": 1.58, "grad_norm": 4.653388023376465, "learning_rate": 2.4000000000000003e-06, "loss": 0.2924, "step": 160875 }, { "epoch": 1.58, "grad_norm": 2.0242888927459717, "learning_rate": 2.3995689655172418e-06, "loss": 0.1372, "step": 160900 }, { "epoch": 1.58, "grad_norm": 4.979043006896973, "learning_rate": 2.399137931034483e-06, "loss": 0.3185, "step": 160925 }, { "epoch": 1.58, "grad_norm": 8.474146842956543, "learning_rate": 2.3987068965517243e-06, "loss": 0.1578, "step": 160950 }, { "epoch": 1.58, "grad_norm": 4.8551344871521, "learning_rate": 2.3982758620689657e-06, "loss": 0.305, "step": 160975 }, { "epoch": 1.58, "grad_norm": 11.057112693786621, "learning_rate": 2.397844827586207e-06, "loss": 0.1512, "step": 161000 }, { "epoch": 1.58, "grad_norm": 4.0567216873168945, "learning_rate": 2.3974137931034486e-06, "loss": 0.3185, "step": 161025 }, { "epoch": 1.58, "grad_norm": 8.652941703796387, "learning_rate": 2.3969827586206897e-06, "loss": 0.1383, "step": 161050 }, { "epoch": 1.58, "grad_norm": 5.407463550567627, "learning_rate": 2.3965689655172415e-06, "loss": 0.2844, "step": 161075 }, { "epoch": 1.58, "grad_norm": 10.964673042297363, "learning_rate": 2.396137931034483e-06, "loss": 0.1282, "step": 161100 }, { "epoch": 1.58, "grad_norm": 6.083632469177246, "learning_rate": 2.3957068965517245e-06, "loss": 0.3143, "step": 161125 }, { "epoch": 1.58, "grad_norm": 5.606424808502197, "learning_rate": 2.3952758620689655e-06, "loss": 0.133, "step": 161150 }, { "epoch": 1.59, "grad_norm": 5.837875843048096, "learning_rate": 2.394844827586207e-06, "loss": 0.2769, "step": 161175 }, { "epoch": 1.59, "grad_norm": 8.692421913146973, "learning_rate": 2.3944137931034484e-06, "loss": 0.158, "step": 161200 }, { "epoch": 1.59, "grad_norm": 3.7676053047180176, "learning_rate": 2.39398275862069e-06, "loss": 0.3003, "step": 161225 }, { "epoch": 1.59, "grad_norm": 2.643803358078003, "learning_rate": 2.3935517241379313e-06, "loss": 0.1328, "step": 161250 }, { "epoch": 1.59, "grad_norm": 5.970462799072266, "learning_rate": 2.3931206896551728e-06, "loss": 0.2693, "step": 161275 }, { "epoch": 1.59, "grad_norm": 7.265668869018555, "learning_rate": 2.3926896551724142e-06, "loss": 0.1705, "step": 161300 }, { "epoch": 1.59, "grad_norm": 3.4931657314300537, "learning_rate": 2.3922586206896553e-06, "loss": 0.2545, "step": 161325 }, { "epoch": 1.59, "grad_norm": 4.757575988769531, "learning_rate": 2.3918275862068967e-06, "loss": 0.1463, "step": 161350 }, { "epoch": 1.59, "grad_norm": 4.643684387207031, "learning_rate": 2.391396551724138e-06, "loss": 0.316, "step": 161375 }, { "epoch": 1.59, "grad_norm": 8.297353744506836, "learning_rate": 2.3909655172413796e-06, "loss": 0.1435, "step": 161400 }, { "epoch": 1.59, "grad_norm": 3.835099458694458, "learning_rate": 2.3905344827586207e-06, "loss": 0.2756, "step": 161425 }, { "epoch": 1.59, "grad_norm": 6.331716537475586, "learning_rate": 2.390103448275862e-06, "loss": 0.1721, "step": 161450 }, { "epoch": 1.59, "grad_norm": 4.435879707336426, "learning_rate": 2.3896724137931036e-06, "loss": 0.2817, "step": 161475 }, { "epoch": 1.59, "grad_norm": 10.032418251037598, "learning_rate": 2.389241379310345e-06, "loss": 0.1697, "step": 161500 }, { "epoch": 1.59, "grad_norm": 4.2694244384765625, "learning_rate": 2.3888103448275865e-06, "loss": 0.2842, "step": 161525 }, { "epoch": 1.59, "grad_norm": 12.970083236694336, "learning_rate": 2.3883793103448275e-06, "loss": 0.1597, "step": 161550 }, { "epoch": 1.59, "grad_norm": 6.566347122192383, "learning_rate": 2.3879482758620694e-06, "loss": 0.3058, "step": 161575 }, { "epoch": 1.59, "grad_norm": 5.893970489501953, "learning_rate": 2.3875172413793104e-06, "loss": 0.1771, "step": 161600 }, { "epoch": 1.59, "grad_norm": 5.185180187225342, "learning_rate": 2.387086206896552e-06, "loss": 0.29, "step": 161625 }, { "epoch": 1.59, "grad_norm": 9.7623872756958, "learning_rate": 2.3866551724137934e-06, "loss": 0.1739, "step": 161650 }, { "epoch": 1.59, "grad_norm": 4.03873348236084, "learning_rate": 2.386224137931035e-06, "loss": 0.3249, "step": 161675 }, { "epoch": 1.59, "grad_norm": 4.591697692871094, "learning_rate": 2.3857931034482763e-06, "loss": 0.1702, "step": 161700 }, { "epoch": 1.59, "grad_norm": 4.583368301391602, "learning_rate": 2.3853620689655173e-06, "loss": 0.2281, "step": 161725 }, { "epoch": 1.59, "grad_norm": 8.350494384765625, "learning_rate": 2.3849310344827588e-06, "loss": 0.146, "step": 161750 }, { "epoch": 1.59, "grad_norm": 4.233597755432129, "learning_rate": 2.3845e-06, "loss": 0.2688, "step": 161775 }, { "epoch": 1.59, "grad_norm": 12.622090339660645, "learning_rate": 2.3840689655172417e-06, "loss": 0.1569, "step": 161800 }, { "epoch": 1.59, "grad_norm": 5.438325881958008, "learning_rate": 2.3836379310344827e-06, "loss": 0.2806, "step": 161825 }, { "epoch": 1.59, "grad_norm": 11.727277755737305, "learning_rate": 2.3832068965517246e-06, "loss": 0.1494, "step": 161850 }, { "epoch": 1.59, "grad_norm": 4.484537124633789, "learning_rate": 2.3827758620689656e-06, "loss": 0.2971, "step": 161875 }, { "epoch": 1.59, "grad_norm": 10.073542594909668, "learning_rate": 2.382344827586207e-06, "loss": 0.1555, "step": 161900 }, { "epoch": 1.59, "grad_norm": 3.7517952919006348, "learning_rate": 2.3819137931034485e-06, "loss": 0.3138, "step": 161925 }, { "epoch": 1.59, "grad_norm": 2.5692198276519775, "learning_rate": 2.38148275862069e-06, "loss": 0.1272, "step": 161950 }, { "epoch": 1.59, "grad_norm": 3.6155056953430176, "learning_rate": 2.3810517241379314e-06, "loss": 0.2707, "step": 161975 }, { "epoch": 1.59, "grad_norm": 11.089386940002441, "learning_rate": 2.3806206896551725e-06, "loss": 0.1362, "step": 162000 }, { "epoch": 1.59, "grad_norm": 4.3737359046936035, "learning_rate": 2.380189655172414e-06, "loss": 0.3099, "step": 162025 }, { "epoch": 1.59, "grad_norm": 3.0931506156921387, "learning_rate": 2.3797586206896554e-06, "loss": 0.1317, "step": 162050 }, { "epoch": 1.59, "grad_norm": 5.7095794677734375, "learning_rate": 2.379327586206897e-06, "loss": 0.3147, "step": 162075 }, { "epoch": 1.59, "grad_norm": 3.322399139404297, "learning_rate": 2.378896551724138e-06, "loss": 0.1536, "step": 162100 }, { "epoch": 1.59, "grad_norm": 3.4982786178588867, "learning_rate": 2.3784655172413793e-06, "loss": 0.2982, "step": 162125 }, { "epoch": 1.59, "grad_norm": 8.509293556213379, "learning_rate": 2.3780344827586208e-06, "loss": 0.1182, "step": 162150 }, { "epoch": 1.6, "grad_norm": 4.068574905395508, "learning_rate": 2.3776034482758622e-06, "loss": 0.2665, "step": 162175 }, { "epoch": 1.6, "grad_norm": 5.703447341918945, "learning_rate": 2.3771724137931037e-06, "loss": 0.1763, "step": 162200 }, { "epoch": 1.6, "grad_norm": 3.115495204925537, "learning_rate": 2.376741379310345e-06, "loss": 0.2987, "step": 162225 }, { "epoch": 1.6, "grad_norm": 8.025434494018555, "learning_rate": 2.3763103448275866e-06, "loss": 0.147, "step": 162250 }, { "epoch": 1.6, "grad_norm": 4.287071704864502, "learning_rate": 2.3758793103448276e-06, "loss": 0.2915, "step": 162275 }, { "epoch": 1.6, "grad_norm": 9.901213645935059, "learning_rate": 2.375448275862069e-06, "loss": 0.1344, "step": 162300 }, { "epoch": 1.6, "grad_norm": 3.604060173034668, "learning_rate": 2.3750172413793106e-06, "loss": 0.287, "step": 162325 }, { "epoch": 1.6, "grad_norm": 7.008869647979736, "learning_rate": 2.374586206896552e-06, "loss": 0.132, "step": 162350 }, { "epoch": 1.6, "grad_norm": 3.4133317470550537, "learning_rate": 2.374155172413793e-06, "loss": 0.2453, "step": 162375 }, { "epoch": 1.6, "grad_norm": 9.233657836914062, "learning_rate": 2.3737241379310345e-06, "loss": 0.1317, "step": 162400 }, { "epoch": 1.6, "grad_norm": 6.596519470214844, "learning_rate": 2.3732931034482764e-06, "loss": 0.2975, "step": 162425 }, { "epoch": 1.6, "grad_norm": 7.9878010749816895, "learning_rate": 2.3728620689655174e-06, "loss": 0.1235, "step": 162450 }, { "epoch": 1.6, "grad_norm": 6.748974323272705, "learning_rate": 2.372431034482759e-06, "loss": 0.3076, "step": 162475 }, { "epoch": 1.6, "grad_norm": 15.048189163208008, "learning_rate": 2.3720000000000003e-06, "loss": 0.124, "step": 162500 }, { "epoch": 1.6, "grad_norm": 3.398390769958496, "learning_rate": 2.3715689655172418e-06, "loss": 0.28, "step": 162525 }, { "epoch": 1.6, "grad_norm": 6.408228874206543, "learning_rate": 2.371137931034483e-06, "loss": 0.1453, "step": 162550 }, { "epoch": 1.6, "grad_norm": 4.473638534545898, "learning_rate": 2.3707068965517243e-06, "loss": 0.2859, "step": 162575 }, { "epoch": 1.6, "grad_norm": 11.215982437133789, "learning_rate": 2.3702758620689657e-06, "loss": 0.1464, "step": 162600 }, { "epoch": 1.6, "grad_norm": 4.650009632110596, "learning_rate": 2.369844827586207e-06, "loss": 0.3448, "step": 162625 }, { "epoch": 1.6, "grad_norm": 5.398932933807373, "learning_rate": 2.3694137931034486e-06, "loss": 0.1422, "step": 162650 }, { "epoch": 1.6, "grad_norm": 5.326776504516602, "learning_rate": 2.3689827586206897e-06, "loss": 0.292, "step": 162675 }, { "epoch": 1.6, "grad_norm": 4.3135175704956055, "learning_rate": 2.368551724137931e-06, "loss": 0.1537, "step": 162700 }, { "epoch": 1.6, "grad_norm": 4.24411153793335, "learning_rate": 2.3681206896551726e-06, "loss": 0.2851, "step": 162725 }, { "epoch": 1.6, "grad_norm": 3.2528269290924072, "learning_rate": 2.367689655172414e-06, "loss": 0.1127, "step": 162750 }, { "epoch": 1.6, "grad_norm": 5.316163539886475, "learning_rate": 2.367258620689655e-06, "loss": 0.2657, "step": 162775 }, { "epoch": 1.6, "grad_norm": 11.437804222106934, "learning_rate": 2.366827586206897e-06, "loss": 0.1701, "step": 162800 }, { "epoch": 1.6, "grad_norm": 4.42639684677124, "learning_rate": 2.366396551724138e-06, "loss": 0.2633, "step": 162825 }, { "epoch": 1.6, "grad_norm": 14.111173629760742, "learning_rate": 2.3659655172413794e-06, "loss": 0.1717, "step": 162850 }, { "epoch": 1.6, "grad_norm": 3.898540496826172, "learning_rate": 2.365534482758621e-06, "loss": 0.2715, "step": 162875 }, { "epoch": 1.6, "grad_norm": 7.934629917144775, "learning_rate": 2.3651034482758624e-06, "loss": 0.1349, "step": 162900 }, { "epoch": 1.6, "grad_norm": 6.34152364730835, "learning_rate": 2.364672413793104e-06, "loss": 0.2805, "step": 162925 }, { "epoch": 1.6, "grad_norm": 2.7983438968658447, "learning_rate": 2.364241379310345e-06, "loss": 0.148, "step": 162950 }, { "epoch": 1.6, "grad_norm": 4.420037269592285, "learning_rate": 2.3638103448275863e-06, "loss": 0.2749, "step": 162975 }, { "epoch": 1.6, "grad_norm": 11.372244834899902, "learning_rate": 2.3633793103448278e-06, "loss": 0.1687, "step": 163000 }, { "epoch": 1.6, "grad_norm": 4.562470436096191, "learning_rate": 2.3629482758620692e-06, "loss": 0.28, "step": 163025 }, { "epoch": 1.6, "grad_norm": 9.941333770751953, "learning_rate": 2.3625172413793103e-06, "loss": 0.1461, "step": 163050 }, { "epoch": 1.6, "grad_norm": 4.202615261077881, "learning_rate": 2.362086206896552e-06, "loss": 0.2621, "step": 163075 }, { "epoch": 1.6, "grad_norm": 11.069938659667969, "learning_rate": 2.361655172413793e-06, "loss": 0.1362, "step": 163100 }, { "epoch": 1.6, "grad_norm": 4.682162761688232, "learning_rate": 2.361241379310345e-06, "loss": 0.266, "step": 163125 }, { "epoch": 1.6, "grad_norm": 8.750456809997559, "learning_rate": 2.3608103448275865e-06, "loss": 0.1644, "step": 163150 }, { "epoch": 1.6, "grad_norm": 5.826968193054199, "learning_rate": 2.360379310344828e-06, "loss": 0.2981, "step": 163175 }, { "epoch": 1.61, "grad_norm": 11.929795265197754, "learning_rate": 2.3599482758620694e-06, "loss": 0.1619, "step": 163200 }, { "epoch": 1.61, "grad_norm": 6.686549186706543, "learning_rate": 2.3595172413793105e-06, "loss": 0.2999, "step": 163225 }, { "epoch": 1.61, "grad_norm": 4.945944309234619, "learning_rate": 2.359086206896552e-06, "loss": 0.1353, "step": 163250 }, { "epoch": 1.61, "grad_norm": 6.463858127593994, "learning_rate": 2.3586551724137934e-06, "loss": 0.2568, "step": 163275 }, { "epoch": 1.61, "grad_norm": 3.835947036743164, "learning_rate": 2.358224137931035e-06, "loss": 0.128, "step": 163300 }, { "epoch": 1.61, "grad_norm": 5.205028533935547, "learning_rate": 2.357793103448276e-06, "loss": 0.2408, "step": 163325 }, { "epoch": 1.61, "grad_norm": 6.039506435394287, "learning_rate": 2.3573620689655173e-06, "loss": 0.1154, "step": 163350 }, { "epoch": 1.61, "grad_norm": 4.5353193283081055, "learning_rate": 2.3569310344827588e-06, "loss": 0.3151, "step": 163375 }, { "epoch": 1.61, "grad_norm": 7.633225917816162, "learning_rate": 2.3565000000000002e-06, "loss": 0.1587, "step": 163400 }, { "epoch": 1.61, "grad_norm": 4.7711100578308105, "learning_rate": 2.3560689655172417e-06, "loss": 0.2736, "step": 163425 }, { "epoch": 1.61, "grad_norm": 10.170652389526367, "learning_rate": 2.3556379310344827e-06, "loss": 0.1437, "step": 163450 }, { "epoch": 1.61, "grad_norm": 5.607334136962891, "learning_rate": 2.3552068965517246e-06, "loss": 0.2909, "step": 163475 }, { "epoch": 1.61, "grad_norm": 12.324041366577148, "learning_rate": 2.3547758620689656e-06, "loss": 0.1042, "step": 163500 }, { "epoch": 1.61, "grad_norm": 3.7740519046783447, "learning_rate": 2.354344827586207e-06, "loss": 0.2793, "step": 163525 }, { "epoch": 1.61, "grad_norm": 11.073596000671387, "learning_rate": 2.3539137931034485e-06, "loss": 0.1565, "step": 163550 }, { "epoch": 1.61, "grad_norm": 4.830742835998535, "learning_rate": 2.35348275862069e-06, "loss": 0.2895, "step": 163575 }, { "epoch": 1.61, "grad_norm": 6.2364583015441895, "learning_rate": 2.353051724137931e-06, "loss": 0.1581, "step": 163600 }, { "epoch": 1.61, "grad_norm": 4.820451259613037, "learning_rate": 2.3526206896551725e-06, "loss": 0.2774, "step": 163625 }, { "epoch": 1.61, "grad_norm": 9.09289836883545, "learning_rate": 2.352189655172414e-06, "loss": 0.1208, "step": 163650 }, { "epoch": 1.61, "grad_norm": 7.033134937286377, "learning_rate": 2.3517586206896554e-06, "loss": 0.2834, "step": 163675 }, { "epoch": 1.61, "grad_norm": 7.046032428741455, "learning_rate": 2.351327586206897e-06, "loss": 0.1237, "step": 163700 }, { "epoch": 1.61, "grad_norm": 5.540129661560059, "learning_rate": 2.350896551724138e-06, "loss": 0.2636, "step": 163725 }, { "epoch": 1.61, "grad_norm": 8.762335777282715, "learning_rate": 2.3504655172413798e-06, "loss": 0.1606, "step": 163750 }, { "epoch": 1.61, "grad_norm": 3.3289759159088135, "learning_rate": 2.350034482758621e-06, "loss": 0.334, "step": 163775 }, { "epoch": 1.61, "grad_norm": 9.356043815612793, "learning_rate": 2.3496034482758623e-06, "loss": 0.1135, "step": 163800 }, { "epoch": 1.61, "grad_norm": 3.7226688861846924, "learning_rate": 2.3491724137931037e-06, "loss": 0.2524, "step": 163825 }, { "epoch": 1.61, "grad_norm": 11.62671184539795, "learning_rate": 2.348741379310345e-06, "loss": 0.1535, "step": 163850 }, { "epoch": 1.61, "grad_norm": 3.6612966060638428, "learning_rate": 2.3483103448275866e-06, "loss": 0.2553, "step": 163875 }, { "epoch": 1.61, "grad_norm": 7.403380393981934, "learning_rate": 2.3478793103448277e-06, "loss": 0.1415, "step": 163900 }, { "epoch": 1.61, "grad_norm": 4.251217842102051, "learning_rate": 2.347448275862069e-06, "loss": 0.2554, "step": 163925 }, { "epoch": 1.61, "grad_norm": 7.0489373207092285, "learning_rate": 2.3470172413793106e-06, "loss": 0.1665, "step": 163950 }, { "epoch": 1.61, "grad_norm": 4.777754306793213, "learning_rate": 2.346586206896552e-06, "loss": 0.3056, "step": 163975 }, { "epoch": 1.61, "grad_norm": 8.217026710510254, "learning_rate": 2.346155172413793e-06, "loss": 0.1653, "step": 164000 }, { "epoch": 1.61, "grad_norm": 3.7223575115203857, "learning_rate": 2.3457241379310345e-06, "loss": 0.2739, "step": 164025 }, { "epoch": 1.61, "grad_norm": 8.730725288391113, "learning_rate": 2.345293103448276e-06, "loss": 0.1294, "step": 164050 }, { "epoch": 1.61, "grad_norm": 5.461477279663086, "learning_rate": 2.3448620689655174e-06, "loss": 0.2746, "step": 164075 }, { "epoch": 1.61, "grad_norm": 10.789887428283691, "learning_rate": 2.344431034482759e-06, "loss": 0.1645, "step": 164100 }, { "epoch": 1.61, "grad_norm": 3.729872465133667, "learning_rate": 2.3440000000000003e-06, "loss": 0.2542, "step": 164125 }, { "epoch": 1.61, "grad_norm": 7.634957790374756, "learning_rate": 2.343568965517242e-06, "loss": 0.1393, "step": 164150 }, { "epoch": 1.61, "grad_norm": 5.210673809051514, "learning_rate": 2.343137931034483e-06, "loss": 0.3039, "step": 164175 }, { "epoch": 1.61, "grad_norm": 7.689409255981445, "learning_rate": 2.3427068965517243e-06, "loss": 0.1512, "step": 164200 }, { "epoch": 1.62, "grad_norm": 5.203684329986572, "learning_rate": 2.3422758620689657e-06, "loss": 0.2349, "step": 164225 }, { "epoch": 1.62, "grad_norm": 7.266146659851074, "learning_rate": 2.341844827586207e-06, "loss": 0.1598, "step": 164250 }, { "epoch": 1.62, "grad_norm": 3.840083599090576, "learning_rate": 2.3414137931034482e-06, "loss": 0.2606, "step": 164275 }, { "epoch": 1.62, "grad_norm": 3.0365779399871826, "learning_rate": 2.3409827586206897e-06, "loss": 0.1655, "step": 164300 }, { "epoch": 1.62, "grad_norm": 3.903576135635376, "learning_rate": 2.340551724137931e-06, "loss": 0.248, "step": 164325 }, { "epoch": 1.62, "grad_norm": 13.434453964233398, "learning_rate": 2.3401206896551726e-06, "loss": 0.1306, "step": 164350 }, { "epoch": 1.62, "grad_norm": 5.071486473083496, "learning_rate": 2.339689655172414e-06, "loss": 0.2835, "step": 164375 }, { "epoch": 1.62, "grad_norm": 7.308156490325928, "learning_rate": 2.3392586206896555e-06, "loss": 0.1308, "step": 164400 }, { "epoch": 1.62, "grad_norm": 5.632904529571533, "learning_rate": 2.338827586206897e-06, "loss": 0.2553, "step": 164425 }, { "epoch": 1.62, "grad_norm": 13.148077011108398, "learning_rate": 2.338396551724138e-06, "loss": 0.1438, "step": 164450 }, { "epoch": 1.62, "grad_norm": 4.91331148147583, "learning_rate": 2.3379655172413795e-06, "loss": 0.2981, "step": 164475 }, { "epoch": 1.62, "grad_norm": 12.685977935791016, "learning_rate": 2.337534482758621e-06, "loss": 0.1826, "step": 164500 }, { "epoch": 1.62, "grad_norm": 6.134115695953369, "learning_rate": 2.3371034482758624e-06, "loss": 0.3111, "step": 164525 }, { "epoch": 1.62, "grad_norm": 7.594655990600586, "learning_rate": 2.336672413793104e-06, "loss": 0.1367, "step": 164550 }, { "epoch": 1.62, "grad_norm": 4.992905616760254, "learning_rate": 2.336241379310345e-06, "loss": 0.3114, "step": 164575 }, { "epoch": 1.62, "grad_norm": 9.557958602905273, "learning_rate": 2.3358103448275863e-06, "loss": 0.1477, "step": 164600 }, { "epoch": 1.62, "grad_norm": 5.130951404571533, "learning_rate": 2.3353793103448278e-06, "loss": 0.2489, "step": 164625 }, { "epoch": 1.62, "grad_norm": 10.651235580444336, "learning_rate": 2.3349482758620692e-06, "loss": 0.1144, "step": 164650 }, { "epoch": 1.62, "grad_norm": 4.9391188621521, "learning_rate": 2.3345172413793103e-06, "loss": 0.2302, "step": 164675 }, { "epoch": 1.62, "grad_norm": 12.970412254333496, "learning_rate": 2.334086206896552e-06, "loss": 0.1142, "step": 164700 }, { "epoch": 1.62, "grad_norm": 4.189625263214111, "learning_rate": 2.333655172413793e-06, "loss": 0.2138, "step": 164725 }, { "epoch": 1.62, "grad_norm": 4.1250739097595215, "learning_rate": 2.3332241379310346e-06, "loss": 0.1435, "step": 164750 }, { "epoch": 1.62, "grad_norm": 4.906681060791016, "learning_rate": 2.332793103448276e-06, "loss": 0.2767, "step": 164775 }, { "epoch": 1.62, "grad_norm": 12.485664367675781, "learning_rate": 2.3323620689655175e-06, "loss": 0.165, "step": 164800 }, { "epoch": 1.62, "grad_norm": 4.648014545440674, "learning_rate": 2.331931034482759e-06, "loss": 0.2308, "step": 164825 }, { "epoch": 1.62, "grad_norm": 5.712649345397949, "learning_rate": 2.3315e-06, "loss": 0.1301, "step": 164850 }, { "epoch": 1.62, "grad_norm": 11.185371398925781, "learning_rate": 2.3310689655172415e-06, "loss": 0.2794, "step": 164875 }, { "epoch": 1.62, "grad_norm": 10.082165718078613, "learning_rate": 2.330637931034483e-06, "loss": 0.1837, "step": 164900 }, { "epoch": 1.62, "grad_norm": 6.0869574546813965, "learning_rate": 2.3302068965517244e-06, "loss": 0.2759, "step": 164925 }, { "epoch": 1.62, "grad_norm": 6.976579189300537, "learning_rate": 2.3297758620689654e-06, "loss": 0.1455, "step": 164950 }, { "epoch": 1.62, "grad_norm": 5.084634780883789, "learning_rate": 2.3293448275862073e-06, "loss": 0.3084, "step": 164975 }, { "epoch": 1.62, "grad_norm": 4.461662292480469, "learning_rate": 2.3289137931034484e-06, "loss": 0.1463, "step": 165000 }, { "epoch": 1.62, "grad_norm": 4.939239501953125, "learning_rate": 2.32848275862069e-06, "loss": 0.2804, "step": 165025 }, { "epoch": 1.62, "grad_norm": 8.372267723083496, "learning_rate": 2.3280517241379313e-06, "loss": 0.1216, "step": 165050 }, { "epoch": 1.62, "grad_norm": 2.758716106414795, "learning_rate": 2.3276206896551727e-06, "loss": 0.1828, "step": 165075 }, { "epoch": 1.62, "grad_norm": 9.335328102111816, "learning_rate": 2.327189655172414e-06, "loss": 0.1777, "step": 165100 }, { "epoch": 1.62, "grad_norm": 4.179744243621826, "learning_rate": 2.3267586206896552e-06, "loss": 0.2288, "step": 165125 }, { "epoch": 1.62, "grad_norm": 4.481169700622559, "learning_rate": 2.3263275862068967e-06, "loss": 0.1544, "step": 165150 }, { "epoch": 1.62, "grad_norm": 4.373037338256836, "learning_rate": 2.325896551724138e-06, "loss": 0.1976, "step": 165175 }, { "epoch": 1.62, "grad_norm": 7.352179527282715, "learning_rate": 2.3254655172413796e-06, "loss": 0.1564, "step": 165200 }, { "epoch": 1.63, "grad_norm": 6.831669807434082, "learning_rate": 2.325051724137931e-06, "loss": 0.3017, "step": 165225 }, { "epoch": 1.63, "grad_norm": 13.334851264953613, "learning_rate": 2.3246206896551725e-06, "loss": 0.1371, "step": 165250 }, { "epoch": 1.63, "grad_norm": 5.871110916137695, "learning_rate": 2.324189655172414e-06, "loss": 0.3428, "step": 165275 }, { "epoch": 1.63, "grad_norm": 5.423585891723633, "learning_rate": 2.3237586206896554e-06, "loss": 0.1517, "step": 165300 }, { "epoch": 1.63, "grad_norm": 15.83281135559082, "learning_rate": 2.323327586206897e-06, "loss": 0.2673, "step": 165325 }, { "epoch": 1.63, "grad_norm": 9.673688888549805, "learning_rate": 2.322896551724138e-06, "loss": 0.148, "step": 165350 }, { "epoch": 1.63, "grad_norm": 3.9149444103240967, "learning_rate": 2.3224655172413798e-06, "loss": 0.3969, "step": 165375 }, { "epoch": 1.63, "grad_norm": 7.403891086578369, "learning_rate": 2.322034482758621e-06, "loss": 0.1473, "step": 165400 }, { "epoch": 1.63, "grad_norm": 6.133029460906982, "learning_rate": 2.3216034482758623e-06, "loss": 0.2574, "step": 165425 }, { "epoch": 1.63, "grad_norm": 9.605982780456543, "learning_rate": 2.3211724137931037e-06, "loss": 0.1292, "step": 165450 }, { "epoch": 1.63, "grad_norm": 4.31483268737793, "learning_rate": 2.320741379310345e-06, "loss": 0.2757, "step": 165475 }, { "epoch": 1.63, "grad_norm": 8.155007362365723, "learning_rate": 2.3203103448275862e-06, "loss": 0.1357, "step": 165500 }, { "epoch": 1.63, "grad_norm": 4.713790416717529, "learning_rate": 2.3198793103448277e-06, "loss": 0.3105, "step": 165525 }, { "epoch": 1.63, "grad_norm": 6.843106269836426, "learning_rate": 2.319448275862069e-06, "loss": 0.1434, "step": 165550 }, { "epoch": 1.63, "grad_norm": 4.434688091278076, "learning_rate": 2.3190172413793106e-06, "loss": 0.2873, "step": 165575 }, { "epoch": 1.63, "grad_norm": 10.526558876037598, "learning_rate": 2.318586206896552e-06, "loss": 0.1444, "step": 165600 }, { "epoch": 1.63, "grad_norm": 6.411110877990723, "learning_rate": 2.318155172413793e-06, "loss": 0.275, "step": 165625 }, { "epoch": 1.63, "grad_norm": 13.138484001159668, "learning_rate": 2.3177241379310345e-06, "loss": 0.1262, "step": 165650 }, { "epoch": 1.63, "grad_norm": 3.5427916049957275, "learning_rate": 2.317293103448276e-06, "loss": 0.2507, "step": 165675 }, { "epoch": 1.63, "grad_norm": 38.98367691040039, "learning_rate": 2.3168620689655174e-06, "loss": 0.1474, "step": 165700 }, { "epoch": 1.63, "grad_norm": 3.9395065307617188, "learning_rate": 2.316431034482759e-06, "loss": 0.2473, "step": 165725 }, { "epoch": 1.63, "grad_norm": 12.351822853088379, "learning_rate": 2.3160000000000004e-06, "loss": 0.1659, "step": 165750 }, { "epoch": 1.63, "grad_norm": 4.081538677215576, "learning_rate": 2.315568965517242e-06, "loss": 0.2394, "step": 165775 }, { "epoch": 1.63, "grad_norm": 11.463614463806152, "learning_rate": 2.315137931034483e-06, "loss": 0.1805, "step": 165800 }, { "epoch": 1.63, "grad_norm": 11.078624725341797, "learning_rate": 2.3147068965517243e-06, "loss": 0.2986, "step": 165825 }, { "epoch": 1.63, "grad_norm": 16.25439453125, "learning_rate": 2.3142758620689658e-06, "loss": 0.1586, "step": 165850 }, { "epoch": 1.63, "grad_norm": 4.315237045288086, "learning_rate": 2.3138448275862072e-06, "loss": 0.3135, "step": 165875 }, { "epoch": 1.63, "grad_norm": 7.86172342300415, "learning_rate": 2.3134137931034482e-06, "loss": 0.1577, "step": 165900 }, { "epoch": 1.63, "grad_norm": 4.749332904815674, "learning_rate": 2.3129827586206897e-06, "loss": 0.2764, "step": 165925 }, { "epoch": 1.63, "grad_norm": 5.183067798614502, "learning_rate": 2.312551724137931e-06, "loss": 0.1158, "step": 165950 }, { "epoch": 1.63, "grad_norm": 3.9294610023498535, "learning_rate": 2.3121206896551726e-06, "loss": 0.278, "step": 165975 }, { "epoch": 1.63, "grad_norm": 7.6639084815979, "learning_rate": 2.311689655172414e-06, "loss": 0.112, "step": 166000 }, { "epoch": 1.63, "grad_norm": 4.842315673828125, "learning_rate": 2.3112586206896555e-06, "loss": 0.2884, "step": 166025 }, { "epoch": 1.63, "grad_norm": 11.195229530334473, "learning_rate": 2.310827586206897e-06, "loss": 0.1273, "step": 166050 }, { "epoch": 1.63, "grad_norm": 4.471065044403076, "learning_rate": 2.310396551724138e-06, "loss": 0.2617, "step": 166075 }, { "epoch": 1.63, "grad_norm": 2.1628520488739014, "learning_rate": 2.3099655172413795e-06, "loss": 0.1386, "step": 166100 }, { "epoch": 1.63, "grad_norm": 4.731081008911133, "learning_rate": 2.309534482758621e-06, "loss": 0.2495, "step": 166125 }, { "epoch": 1.63, "grad_norm": 4.511196136474609, "learning_rate": 2.3091034482758624e-06, "loss": 0.1291, "step": 166150 }, { "epoch": 1.63, "grad_norm": 4.27744722366333, "learning_rate": 2.3086724137931034e-06, "loss": 0.2684, "step": 166175 }, { "epoch": 1.63, "grad_norm": 14.584598541259766, "learning_rate": 2.308241379310345e-06, "loss": 0.178, "step": 166200 }, { "epoch": 1.63, "grad_norm": 5.3842315673828125, "learning_rate": 2.3078103448275863e-06, "loss": 0.2615, "step": 166225 }, { "epoch": 1.64, "grad_norm": 9.440625190734863, "learning_rate": 2.307379310344828e-06, "loss": 0.1751, "step": 166250 }, { "epoch": 1.64, "grad_norm": 4.491339683532715, "learning_rate": 2.3069482758620692e-06, "loss": 0.3227, "step": 166275 }, { "epoch": 1.64, "grad_norm": 10.411723136901855, "learning_rate": 2.3065172413793103e-06, "loss": 0.1344, "step": 166300 }, { "epoch": 1.64, "grad_norm": 3.708813428878784, "learning_rate": 2.306086206896552e-06, "loss": 0.318, "step": 166325 }, { "epoch": 1.64, "grad_norm": 5.786497116088867, "learning_rate": 2.305655172413793e-06, "loss": 0.124, "step": 166350 }, { "epoch": 1.64, "grad_norm": 4.4033284187316895, "learning_rate": 2.3052241379310347e-06, "loss": 0.289, "step": 166375 }, { "epoch": 1.64, "grad_norm": 12.526390075683594, "learning_rate": 2.304793103448276e-06, "loss": 0.1605, "step": 166400 }, { "epoch": 1.64, "grad_norm": 3.826849937438965, "learning_rate": 2.3043620689655176e-06, "loss": 0.2697, "step": 166425 }, { "epoch": 1.64, "grad_norm": 9.971390724182129, "learning_rate": 2.3039310344827586e-06, "loss": 0.1287, "step": 166450 }, { "epoch": 1.64, "grad_norm": 4.348424434661865, "learning_rate": 2.3035e-06, "loss": 0.2968, "step": 166475 }, { "epoch": 1.64, "grad_norm": 9.669129371643066, "learning_rate": 2.3030689655172415e-06, "loss": 0.1646, "step": 166500 }, { "epoch": 1.64, "grad_norm": 4.810392379760742, "learning_rate": 2.302637931034483e-06, "loss": 0.2906, "step": 166525 }, { "epoch": 1.64, "grad_norm": 13.659046173095703, "learning_rate": 2.3022068965517244e-06, "loss": 0.1443, "step": 166550 }, { "epoch": 1.64, "grad_norm": 3.174440860748291, "learning_rate": 2.3017758620689655e-06, "loss": 0.31, "step": 166575 }, { "epoch": 1.64, "grad_norm": 0.7614631652832031, "learning_rate": 2.3013448275862073e-06, "loss": 0.11, "step": 166600 }, { "epoch": 1.64, "grad_norm": 4.037200450897217, "learning_rate": 2.3009137931034484e-06, "loss": 0.3136, "step": 166625 }, { "epoch": 1.64, "grad_norm": 8.420721054077148, "learning_rate": 2.30048275862069e-06, "loss": 0.145, "step": 166650 }, { "epoch": 1.64, "grad_norm": 4.635502815246582, "learning_rate": 2.3000517241379313e-06, "loss": 0.359, "step": 166675 }, { "epoch": 1.64, "grad_norm": 10.22412109375, "learning_rate": 2.2996206896551727e-06, "loss": 0.1644, "step": 166700 }, { "epoch": 1.64, "grad_norm": 5.236969947814941, "learning_rate": 2.299189655172414e-06, "loss": 0.2502, "step": 166725 }, { "epoch": 1.64, "grad_norm": 8.554662704467773, "learning_rate": 2.2987586206896552e-06, "loss": 0.1475, "step": 166750 }, { "epoch": 1.64, "grad_norm": 5.360531330108643, "learning_rate": 2.2983275862068967e-06, "loss": 0.288, "step": 166775 }, { "epoch": 1.64, "grad_norm": 8.568504333496094, "learning_rate": 2.297896551724138e-06, "loss": 0.1599, "step": 166800 }, { "epoch": 1.64, "grad_norm": 3.5444655418395996, "learning_rate": 2.2974655172413796e-06, "loss": 0.2904, "step": 166825 }, { "epoch": 1.64, "grad_norm": 11.617025375366211, "learning_rate": 2.2970344827586206e-06, "loss": 0.1399, "step": 166850 }, { "epoch": 1.64, "grad_norm": 6.687685012817383, "learning_rate": 2.296603448275862e-06, "loss": 0.3168, "step": 166875 }, { "epoch": 1.64, "grad_norm": 6.161853790283203, "learning_rate": 2.2961724137931035e-06, "loss": 0.1371, "step": 166900 }, { "epoch": 1.64, "grad_norm": 6.776427745819092, "learning_rate": 2.295741379310345e-06, "loss": 0.275, "step": 166925 }, { "epoch": 1.64, "grad_norm": 5.427211284637451, "learning_rate": 2.2953103448275865e-06, "loss": 0.1347, "step": 166950 }, { "epoch": 1.64, "grad_norm": 4.2857255935668945, "learning_rate": 2.294879310344828e-06, "loss": 0.3072, "step": 166975 }, { "epoch": 1.64, "grad_norm": 8.307514190673828, "learning_rate": 2.2944482758620694e-06, "loss": 0.1512, "step": 167000 }, { "epoch": 1.64, "grad_norm": 6.585513114929199, "learning_rate": 2.2940172413793104e-06, "loss": 0.2292, "step": 167025 }, { "epoch": 1.64, "grad_norm": 4.800861358642578, "learning_rate": 2.293586206896552e-06, "loss": 0.1719, "step": 167050 }, { "epoch": 1.64, "grad_norm": 5.464439392089844, "learning_rate": 2.2931551724137933e-06, "loss": 0.3112, "step": 167075 }, { "epoch": 1.64, "grad_norm": 6.487067699432373, "learning_rate": 2.2927241379310348e-06, "loss": 0.1283, "step": 167100 }, { "epoch": 1.64, "grad_norm": 4.3226542472839355, "learning_rate": 2.292293103448276e-06, "loss": 0.2366, "step": 167125 }, { "epoch": 1.64, "grad_norm": 6.46524715423584, "learning_rate": 2.2918620689655173e-06, "loss": 0.1304, "step": 167150 }, { "epoch": 1.64, "grad_norm": 5.148814678192139, "learning_rate": 2.291431034482759e-06, "loss": 0.2495, "step": 167175 }, { "epoch": 1.64, "grad_norm": 7.143841743469238, "learning_rate": 2.291e-06, "loss": 0.1754, "step": 167200 }, { "epoch": 1.64, "grad_norm": 6.358733654022217, "learning_rate": 2.2905689655172416e-06, "loss": 0.2329, "step": 167225 }, { "epoch": 1.64, "grad_norm": 13.808842658996582, "learning_rate": 2.290137931034483e-06, "loss": 0.192, "step": 167250 }, { "epoch": 1.65, "grad_norm": 4.603672027587891, "learning_rate": 2.289724137931035e-06, "loss": 0.2315, "step": 167275 }, { "epoch": 1.65, "grad_norm": 5.319714069366455, "learning_rate": 2.289293103448276e-06, "loss": 0.1433, "step": 167300 }, { "epoch": 1.65, "grad_norm": 4.856056213378906, "learning_rate": 2.2888620689655175e-06, "loss": 0.2892, "step": 167325 }, { "epoch": 1.65, "grad_norm": 6.5073933601379395, "learning_rate": 2.288431034482759e-06, "loss": 0.0986, "step": 167350 }, { "epoch": 1.65, "grad_norm": 4.041173458099365, "learning_rate": 2.2880000000000004e-06, "loss": 0.2277, "step": 167375 }, { "epoch": 1.65, "grad_norm": 12.837746620178223, "learning_rate": 2.2875689655172414e-06, "loss": 0.1451, "step": 167400 }, { "epoch": 1.65, "grad_norm": 4.379159450531006, "learning_rate": 2.287137931034483e-06, "loss": 0.3116, "step": 167425 }, { "epoch": 1.65, "grad_norm": 12.429922103881836, "learning_rate": 2.2867068965517243e-06, "loss": 0.1387, "step": 167450 }, { "epoch": 1.65, "grad_norm": 3.456242561340332, "learning_rate": 2.2862758620689658e-06, "loss": 0.2671, "step": 167475 }, { "epoch": 1.65, "grad_norm": 9.513569831848145, "learning_rate": 2.2858448275862072e-06, "loss": 0.1863, "step": 167500 }, { "epoch": 1.65, "grad_norm": 3.6498451232910156, "learning_rate": 2.2854137931034483e-06, "loss": 0.3095, "step": 167525 }, { "epoch": 1.65, "grad_norm": 8.4265718460083, "learning_rate": 2.2849827586206897e-06, "loss": 0.1394, "step": 167550 }, { "epoch": 1.65, "grad_norm": 8.221955299377441, "learning_rate": 2.284551724137931e-06, "loss": 0.2444, "step": 167575 }, { "epoch": 1.65, "grad_norm": 6.837437629699707, "learning_rate": 2.2841206896551726e-06, "loss": 0.1571, "step": 167600 }, { "epoch": 1.65, "grad_norm": 6.281114101409912, "learning_rate": 2.2836896551724137e-06, "loss": 0.2913, "step": 167625 }, { "epoch": 1.65, "grad_norm": 6.685856819152832, "learning_rate": 2.2832586206896555e-06, "loss": 0.1652, "step": 167650 }, { "epoch": 1.65, "grad_norm": 6.973714351654053, "learning_rate": 2.282827586206897e-06, "loss": 0.2601, "step": 167675 }, { "epoch": 1.65, "grad_norm": 12.626203536987305, "learning_rate": 2.282396551724138e-06, "loss": 0.1463, "step": 167700 }, { "epoch": 1.65, "grad_norm": 4.240610122680664, "learning_rate": 2.2819655172413795e-06, "loss": 0.2786, "step": 167725 }, { "epoch": 1.65, "grad_norm": 12.748811721801758, "learning_rate": 2.281534482758621e-06, "loss": 0.157, "step": 167750 }, { "epoch": 1.65, "grad_norm": 7.901600360870361, "learning_rate": 2.2811034482758624e-06, "loss": 0.2934, "step": 167775 }, { "epoch": 1.65, "grad_norm": 9.95637035369873, "learning_rate": 2.2806724137931034e-06, "loss": 0.1602, "step": 167800 }, { "epoch": 1.65, "grad_norm": 5.7610979080200195, "learning_rate": 2.280241379310345e-06, "loss": 0.2742, "step": 167825 }, { "epoch": 1.65, "grad_norm": 8.599592208862305, "learning_rate": 2.2798103448275863e-06, "loss": 0.1531, "step": 167850 }, { "epoch": 1.65, "grad_norm": 4.433034420013428, "learning_rate": 2.279379310344828e-06, "loss": 0.2557, "step": 167875 }, { "epoch": 1.65, "grad_norm": 10.095311164855957, "learning_rate": 2.278948275862069e-06, "loss": 0.1682, "step": 167900 }, { "epoch": 1.65, "grad_norm": 4.158813953399658, "learning_rate": 2.2785172413793107e-06, "loss": 0.2615, "step": 167925 }, { "epoch": 1.65, "grad_norm": 6.218344211578369, "learning_rate": 2.278086206896552e-06, "loss": 0.1499, "step": 167950 }, { "epoch": 1.65, "grad_norm": 4.146055221557617, "learning_rate": 2.277655172413793e-06, "loss": 0.2272, "step": 167975 }, { "epoch": 1.65, "grad_norm": 9.862139701843262, "learning_rate": 2.2772241379310347e-06, "loss": 0.1348, "step": 168000 }, { "epoch": 1.65, "grad_norm": 4.544498443603516, "learning_rate": 2.276793103448276e-06, "loss": 0.2674, "step": 168025 }, { "epoch": 1.65, "grad_norm": 10.87419319152832, "learning_rate": 2.2763620689655176e-06, "loss": 0.1458, "step": 168050 }, { "epoch": 1.65, "grad_norm": 5.273707866668701, "learning_rate": 2.2759310344827586e-06, "loss": 0.2673, "step": 168075 }, { "epoch": 1.65, "grad_norm": 7.204365253448486, "learning_rate": 2.2755e-06, "loss": 0.1269, "step": 168100 }, { "epoch": 1.65, "grad_norm": 3.8730697631835938, "learning_rate": 2.2750689655172415e-06, "loss": 0.3079, "step": 168125 }, { "epoch": 1.65, "grad_norm": 4.140575885772705, "learning_rate": 2.274637931034483e-06, "loss": 0.1933, "step": 168150 }, { "epoch": 1.65, "grad_norm": 7.644063472747803, "learning_rate": 2.2742068965517244e-06, "loss": 0.2648, "step": 168175 }, { "epoch": 1.65, "grad_norm": 13.885876655578613, "learning_rate": 2.2737758620689655e-06, "loss": 0.1534, "step": 168200 }, { "epoch": 1.65, "grad_norm": 6.602691173553467, "learning_rate": 2.2733448275862073e-06, "loss": 0.2821, "step": 168225 }, { "epoch": 1.65, "grad_norm": 10.037129402160645, "learning_rate": 2.2729137931034484e-06, "loss": 0.1062, "step": 168250 }, { "epoch": 1.66, "grad_norm": 3.798879384994507, "learning_rate": 2.27248275862069e-06, "loss": 0.2389, "step": 168275 }, { "epoch": 1.66, "grad_norm": 12.116844177246094, "learning_rate": 2.2720517241379313e-06, "loss": 0.1694, "step": 168300 }, { "epoch": 1.66, "grad_norm": 4.723820686340332, "learning_rate": 2.2716206896551728e-06, "loss": 0.265, "step": 168325 }, { "epoch": 1.66, "grad_norm": 10.116640090942383, "learning_rate": 2.2711896551724138e-06, "loss": 0.167, "step": 168350 }, { "epoch": 1.66, "grad_norm": 3.8255903720855713, "learning_rate": 2.2707586206896552e-06, "loss": 0.2825, "step": 168375 }, { "epoch": 1.66, "grad_norm": 7.13722562789917, "learning_rate": 2.2703275862068967e-06, "loss": 0.1484, "step": 168400 }, { "epoch": 1.66, "grad_norm": 5.492502689361572, "learning_rate": 2.269896551724138e-06, "loss": 0.3419, "step": 168425 }, { "epoch": 1.66, "grad_norm": 19.468799591064453, "learning_rate": 2.2694655172413796e-06, "loss": 0.1627, "step": 168450 }, { "epoch": 1.66, "grad_norm": 5.742552280426025, "learning_rate": 2.2690344827586206e-06, "loss": 0.2462, "step": 168475 }, { "epoch": 1.66, "grad_norm": 13.004039764404297, "learning_rate": 2.2686034482758625e-06, "loss": 0.1525, "step": 168500 }, { "epoch": 1.66, "grad_norm": 4.85398006439209, "learning_rate": 2.2681724137931036e-06, "loss": 0.3638, "step": 168525 }, { "epoch": 1.66, "grad_norm": 9.526908874511719, "learning_rate": 2.267741379310345e-06, "loss": 0.1564, "step": 168550 }, { "epoch": 1.66, "grad_norm": 6.053741455078125, "learning_rate": 2.2673103448275865e-06, "loss": 0.2963, "step": 168575 }, { "epoch": 1.66, "grad_norm": 7.954762935638428, "learning_rate": 2.266879310344828e-06, "loss": 0.1336, "step": 168600 }, { "epoch": 1.66, "grad_norm": 5.548492908477783, "learning_rate": 2.2664482758620694e-06, "loss": 0.2436, "step": 168625 }, { "epoch": 1.66, "grad_norm": 4.558068752288818, "learning_rate": 2.2660172413793104e-06, "loss": 0.0974, "step": 168650 }, { "epoch": 1.66, "grad_norm": 5.039231300354004, "learning_rate": 2.265586206896552e-06, "loss": 0.2415, "step": 168675 }, { "epoch": 1.66, "grad_norm": 8.135687828063965, "learning_rate": 2.2651551724137933e-06, "loss": 0.1571, "step": 168700 }, { "epoch": 1.66, "grad_norm": 4.350305557250977, "learning_rate": 2.2647241379310348e-06, "loss": 0.2605, "step": 168725 }, { "epoch": 1.66, "grad_norm": 7.773582458496094, "learning_rate": 2.264293103448276e-06, "loss": 0.1331, "step": 168750 }, { "epoch": 1.66, "grad_norm": 5.082671165466309, "learning_rate": 2.2638620689655173e-06, "loss": 0.2569, "step": 168775 }, { "epoch": 1.66, "grad_norm": 4.483227729797363, "learning_rate": 2.2634310344827587e-06, "loss": 0.1399, "step": 168800 }, { "epoch": 1.66, "grad_norm": 3.185526132583618, "learning_rate": 2.263e-06, "loss": 0.2511, "step": 168825 }, { "epoch": 1.66, "grad_norm": 10.878083229064941, "learning_rate": 2.2625689655172416e-06, "loss": 0.1392, "step": 168850 }, { "epoch": 1.66, "grad_norm": 5.061445713043213, "learning_rate": 2.262137931034483e-06, "loss": 0.2873, "step": 168875 }, { "epoch": 1.66, "grad_norm": 7.199263095855713, "learning_rate": 2.2617068965517246e-06, "loss": 0.1171, "step": 168900 }, { "epoch": 1.66, "grad_norm": 4.549415111541748, "learning_rate": 2.2612758620689656e-06, "loss": 0.3202, "step": 168925 }, { "epoch": 1.66, "grad_norm": 7.8637871742248535, "learning_rate": 2.260844827586207e-06, "loss": 0.1128, "step": 168950 }, { "epoch": 1.66, "grad_norm": 4.328304767608643, "learning_rate": 2.2604137931034485e-06, "loss": 0.2268, "step": 168975 }, { "epoch": 1.66, "grad_norm": 11.684609413146973, "learning_rate": 2.25998275862069e-06, "loss": 0.1418, "step": 169000 }, { "epoch": 1.66, "grad_norm": 4.204935073852539, "learning_rate": 2.259551724137931e-06, "loss": 0.3307, "step": 169025 }, { "epoch": 1.66, "grad_norm": 7.093397617340088, "learning_rate": 2.2591206896551724e-06, "loss": 0.1369, "step": 169050 }, { "epoch": 1.66, "grad_norm": 4.146976947784424, "learning_rate": 2.258689655172414e-06, "loss": 0.2544, "step": 169075 }, { "epoch": 1.66, "grad_norm": 6.252134799957275, "learning_rate": 2.2582586206896554e-06, "loss": 0.1402, "step": 169100 }, { "epoch": 1.66, "grad_norm": 4.607887268066406, "learning_rate": 2.257827586206897e-06, "loss": 0.3162, "step": 169125 }, { "epoch": 1.66, "grad_norm": 4.989339828491211, "learning_rate": 2.2573965517241383e-06, "loss": 0.146, "step": 169150 }, { "epoch": 1.66, "grad_norm": 5.751457214355469, "learning_rate": 2.2569655172413797e-06, "loss": 0.2898, "step": 169175 }, { "epoch": 1.66, "grad_norm": 6.420615196228027, "learning_rate": 2.2565344827586208e-06, "loss": 0.1315, "step": 169200 }, { "epoch": 1.66, "grad_norm": 3.931767463684082, "learning_rate": 2.2561034482758622e-06, "loss": 0.2676, "step": 169225 }, { "epoch": 1.66, "grad_norm": 10.07784652709961, "learning_rate": 2.2556724137931037e-06, "loss": 0.1209, "step": 169250 }, { "epoch": 1.66, "grad_norm": 4.985448837280273, "learning_rate": 2.2552586206896556e-06, "loss": 0.2627, "step": 169275 }, { "epoch": 1.67, "grad_norm": 5.583284854888916, "learning_rate": 2.2548275862068966e-06, "loss": 0.1251, "step": 169300 }, { "epoch": 1.67, "grad_norm": 7.340342998504639, "learning_rate": 2.254396551724138e-06, "loss": 0.302, "step": 169325 }, { "epoch": 1.67, "grad_norm": 8.358713150024414, "learning_rate": 2.2539655172413795e-06, "loss": 0.1215, "step": 169350 }, { "epoch": 1.67, "grad_norm": 3.994584083557129, "learning_rate": 2.253534482758621e-06, "loss": 0.3248, "step": 169375 }, { "epoch": 1.67, "grad_norm": 10.925600051879883, "learning_rate": 2.2531034482758624e-06, "loss": 0.1611, "step": 169400 }, { "epoch": 1.67, "grad_norm": 3.9229319095611572, "learning_rate": 2.2526724137931035e-06, "loss": 0.2915, "step": 169425 }, { "epoch": 1.67, "grad_norm": 11.775981903076172, "learning_rate": 2.252241379310345e-06, "loss": 0.1469, "step": 169450 }, { "epoch": 1.67, "grad_norm": 3.9289844036102295, "learning_rate": 2.2518103448275864e-06, "loss": 0.2504, "step": 169475 }, { "epoch": 1.67, "grad_norm": 5.796089172363281, "learning_rate": 2.251379310344828e-06, "loss": 0.1503, "step": 169500 }, { "epoch": 1.67, "grad_norm": 4.702694416046143, "learning_rate": 2.250948275862069e-06, "loss": 0.2188, "step": 169525 }, { "epoch": 1.67, "grad_norm": 11.430030822753906, "learning_rate": 2.2505172413793107e-06, "loss": 0.1288, "step": 169550 }, { "epoch": 1.67, "grad_norm": 4.476912021636963, "learning_rate": 2.2500862068965518e-06, "loss": 0.3387, "step": 169575 }, { "epoch": 1.67, "grad_norm": 15.94636344909668, "learning_rate": 2.2496551724137932e-06, "loss": 0.1505, "step": 169600 }, { "epoch": 1.67, "grad_norm": 2.9566025733947754, "learning_rate": 2.2492241379310347e-06, "loss": 0.3098, "step": 169625 }, { "epoch": 1.67, "grad_norm": 3.8762612342834473, "learning_rate": 2.248793103448276e-06, "loss": 0.1116, "step": 169650 }, { "epoch": 1.67, "grad_norm": 4.658416271209717, "learning_rate": 2.2483620689655176e-06, "loss": 0.2369, "step": 169675 }, { "epoch": 1.67, "grad_norm": 9.278172492980957, "learning_rate": 2.2479310344827586e-06, "loss": 0.1476, "step": 169700 }, { "epoch": 1.67, "grad_norm": 4.6327290534973145, "learning_rate": 2.2475e-06, "loss": 0.273, "step": 169725 }, { "epoch": 1.67, "grad_norm": 10.075526237487793, "learning_rate": 2.2470689655172415e-06, "loss": 0.1625, "step": 169750 }, { "epoch": 1.67, "grad_norm": 4.404919147491455, "learning_rate": 2.246637931034483e-06, "loss": 0.2934, "step": 169775 }, { "epoch": 1.67, "grad_norm": 4.105140686035156, "learning_rate": 2.246206896551724e-06, "loss": 0.1471, "step": 169800 }, { "epoch": 1.67, "grad_norm": 4.775747299194336, "learning_rate": 2.245775862068966e-06, "loss": 0.2924, "step": 169825 }, { "epoch": 1.67, "grad_norm": 12.547199249267578, "learning_rate": 2.2453448275862074e-06, "loss": 0.1692, "step": 169850 }, { "epoch": 1.67, "grad_norm": 4.159137725830078, "learning_rate": 2.2449137931034484e-06, "loss": 0.2695, "step": 169875 }, { "epoch": 1.67, "grad_norm": 11.817564010620117, "learning_rate": 2.24448275862069e-06, "loss": 0.1875, "step": 169900 }, { "epoch": 1.67, "grad_norm": 4.777662754058838, "learning_rate": 2.2440517241379313e-06, "loss": 0.3505, "step": 169925 }, { "epoch": 1.67, "grad_norm": 9.713570594787598, "learning_rate": 2.2436206896551728e-06, "loss": 0.1358, "step": 169950 }, { "epoch": 1.67, "grad_norm": 7.797518730163574, "learning_rate": 2.243189655172414e-06, "loss": 0.2728, "step": 169975 }, { "epoch": 1.67, "grad_norm": 5.6893439292907715, "learning_rate": 2.2427586206896553e-06, "loss": 0.1091, "step": 170000 }, { "epoch": 1.67, "grad_norm": 3.525437116622925, "learning_rate": 2.2423275862068967e-06, "loss": 0.2934, "step": 170025 }, { "epoch": 1.67, "grad_norm": 9.661835670471191, "learning_rate": 2.241896551724138e-06, "loss": 0.1631, "step": 170050 }, { "epoch": 1.67, "grad_norm": 4.906921863555908, "learning_rate": 2.2414655172413796e-06, "loss": 0.2694, "step": 170075 }, { "epoch": 1.67, "grad_norm": 7.693243026733398, "learning_rate": 2.2410344827586207e-06, "loss": 0.1539, "step": 170100 }, { "epoch": 1.67, "grad_norm": 5.902342319488525, "learning_rate": 2.2406034482758625e-06, "loss": 0.299, "step": 170125 }, { "epoch": 1.67, "grad_norm": 6.67675256729126, "learning_rate": 2.2401724137931036e-06, "loss": 0.1341, "step": 170150 }, { "epoch": 1.67, "grad_norm": 3.5797548294067383, "learning_rate": 2.239741379310345e-06, "loss": 0.241, "step": 170175 }, { "epoch": 1.67, "grad_norm": 5.0334153175354, "learning_rate": 2.2393103448275865e-06, "loss": 0.1817, "step": 170200 }, { "epoch": 1.67, "grad_norm": 6.1799187660217285, "learning_rate": 2.238879310344828e-06, "loss": 0.3162, "step": 170225 }, { "epoch": 1.67, "grad_norm": 6.332712173461914, "learning_rate": 2.238448275862069e-06, "loss": 0.1571, "step": 170250 }, { "epoch": 1.67, "grad_norm": 8.464255332946777, "learning_rate": 2.2380172413793104e-06, "loss": 0.2839, "step": 170275 }, { "epoch": 1.67, "grad_norm": 6.536596298217773, "learning_rate": 2.237586206896552e-06, "loss": 0.1503, "step": 170300 }, { "epoch": 1.68, "grad_norm": 4.34855842590332, "learning_rate": 2.2371551724137933e-06, "loss": 0.3121, "step": 170325 }, { "epoch": 1.68, "grad_norm": 2.5120677947998047, "learning_rate": 2.236724137931035e-06, "loss": 0.1406, "step": 170350 }, { "epoch": 1.68, "grad_norm": 3.954883575439453, "learning_rate": 2.236293103448276e-06, "loss": 0.2628, "step": 170375 }, { "epoch": 1.68, "grad_norm": 10.347868919372559, "learning_rate": 2.2358620689655173e-06, "loss": 0.1596, "step": 170400 }, { "epoch": 1.68, "grad_norm": 4.124727249145508, "learning_rate": 2.2354310344827587e-06, "loss": 0.3056, "step": 170425 }, { "epoch": 1.68, "grad_norm": 5.941477298736572, "learning_rate": 2.235e-06, "loss": 0.1546, "step": 170450 }, { "epoch": 1.68, "grad_norm": 7.363317966461182, "learning_rate": 2.2345689655172417e-06, "loss": 0.2435, "step": 170475 }, { "epoch": 1.68, "grad_norm": 12.262929916381836, "learning_rate": 2.234137931034483e-06, "loss": 0.125, "step": 170500 }, { "epoch": 1.68, "grad_norm": 3.0635292530059814, "learning_rate": 2.2337068965517246e-06, "loss": 0.3087, "step": 170525 }, { "epoch": 1.68, "grad_norm": 4.002910614013672, "learning_rate": 2.2332758620689656e-06, "loss": 0.112, "step": 170550 }, { "epoch": 1.68, "grad_norm": 3.5490732192993164, "learning_rate": 2.232844827586207e-06, "loss": 0.2853, "step": 170575 }, { "epoch": 1.68, "grad_norm": 4.450990676879883, "learning_rate": 2.2324137931034485e-06, "loss": 0.1352, "step": 170600 }, { "epoch": 1.68, "grad_norm": 3.472773313522339, "learning_rate": 2.23198275862069e-06, "loss": 0.3328, "step": 170625 }, { "epoch": 1.68, "grad_norm": 1.8065149784088135, "learning_rate": 2.231551724137931e-06, "loss": 0.1445, "step": 170650 }, { "epoch": 1.68, "grad_norm": 3.827375650405884, "learning_rate": 2.2311206896551725e-06, "loss": 0.2509, "step": 170675 }, { "epoch": 1.68, "grad_norm": 14.705519676208496, "learning_rate": 2.230689655172414e-06, "loss": 0.1402, "step": 170700 }, { "epoch": 1.68, "grad_norm": 4.119476795196533, "learning_rate": 2.2302586206896554e-06, "loss": 0.2727, "step": 170725 }, { "epoch": 1.68, "grad_norm": 10.22771167755127, "learning_rate": 2.2298275862068964e-06, "loss": 0.1399, "step": 170750 }, { "epoch": 1.68, "grad_norm": 4.0611138343811035, "learning_rate": 2.2293965517241383e-06, "loss": 0.2899, "step": 170775 }, { "epoch": 1.68, "grad_norm": 15.812871932983398, "learning_rate": 2.2289655172413797e-06, "loss": 0.1597, "step": 170800 }, { "epoch": 1.68, "grad_norm": 5.234299182891846, "learning_rate": 2.2285344827586208e-06, "loss": 0.3253, "step": 170825 }, { "epoch": 1.68, "grad_norm": 3.944221258163452, "learning_rate": 2.2281034482758622e-06, "loss": 0.1212, "step": 170850 }, { "epoch": 1.68, "grad_norm": 5.104275703430176, "learning_rate": 2.2276724137931037e-06, "loss": 0.241, "step": 170875 }, { "epoch": 1.68, "grad_norm": 10.578239440917969, "learning_rate": 2.227241379310345e-06, "loss": 0.1296, "step": 170900 }, { "epoch": 1.68, "grad_norm": 3.9502811431884766, "learning_rate": 2.226810344827586e-06, "loss": 0.2491, "step": 170925 }, { "epoch": 1.68, "grad_norm": 8.57064151763916, "learning_rate": 2.2263793103448276e-06, "loss": 0.1835, "step": 170950 }, { "epoch": 1.68, "grad_norm": 5.086857795715332, "learning_rate": 2.225948275862069e-06, "loss": 0.2934, "step": 170975 }, { "epoch": 1.68, "grad_norm": 10.90731143951416, "learning_rate": 2.2255172413793105e-06, "loss": 0.1628, "step": 171000 }, { "epoch": 1.68, "grad_norm": 4.76107931137085, "learning_rate": 2.225086206896552e-06, "loss": 0.2529, "step": 171025 }, { "epoch": 1.68, "grad_norm": 14.528185844421387, "learning_rate": 2.2246551724137935e-06, "loss": 0.1469, "step": 171050 }, { "epoch": 1.68, "grad_norm": 8.31441879272461, "learning_rate": 2.224224137931035e-06, "loss": 0.2539, "step": 171075 }, { "epoch": 1.68, "grad_norm": 3.2487313747406006, "learning_rate": 2.223793103448276e-06, "loss": 0.1134, "step": 171100 }, { "epoch": 1.68, "grad_norm": 3.829871892929077, "learning_rate": 2.2233620689655174e-06, "loss": 0.2584, "step": 171125 }, { "epoch": 1.68, "grad_norm": 18.663393020629883, "learning_rate": 2.222931034482759e-06, "loss": 0.1514, "step": 171150 }, { "epoch": 1.68, "grad_norm": 5.398128509521484, "learning_rate": 2.2225000000000003e-06, "loss": 0.2781, "step": 171175 }, { "epoch": 1.68, "grad_norm": 7.421438217163086, "learning_rate": 2.2220689655172414e-06, "loss": 0.1662, "step": 171200 }, { "epoch": 1.68, "grad_norm": 4.629865646362305, "learning_rate": 2.221637931034483e-06, "loss": 0.246, "step": 171225 }, { "epoch": 1.68, "grad_norm": 7.100013256072998, "learning_rate": 2.2212068965517243e-06, "loss": 0.1142, "step": 171250 }, { "epoch": 1.68, "grad_norm": 4.057968616485596, "learning_rate": 2.2207758620689657e-06, "loss": 0.2606, "step": 171275 }, { "epoch": 1.68, "grad_norm": 6.457848072052002, "learning_rate": 2.220344827586207e-06, "loss": 0.1471, "step": 171300 }, { "epoch": 1.69, "grad_norm": 5.733996391296387, "learning_rate": 2.2199137931034482e-06, "loss": 0.2731, "step": 171325 }, { "epoch": 1.69, "grad_norm": 8.786975860595703, "learning_rate": 2.21948275862069e-06, "loss": 0.1314, "step": 171350 }, { "epoch": 1.69, "grad_norm": 5.248635292053223, "learning_rate": 2.219051724137931e-06, "loss": 0.3043, "step": 171375 }, { "epoch": 1.69, "grad_norm": 9.172379493713379, "learning_rate": 2.2186206896551726e-06, "loss": 0.1242, "step": 171400 }, { "epoch": 1.69, "grad_norm": 4.826041221618652, "learning_rate": 2.218206896551724e-06, "loss": 0.2483, "step": 171425 }, { "epoch": 1.69, "grad_norm": 16.104564666748047, "learning_rate": 2.217775862068966e-06, "loss": 0.1768, "step": 171450 }, { "epoch": 1.69, "grad_norm": 3.5858676433563232, "learning_rate": 2.217344827586207e-06, "loss": 0.325, "step": 171475 }, { "epoch": 1.69, "grad_norm": 4.205782413482666, "learning_rate": 2.2169137931034484e-06, "loss": 0.1304, "step": 171500 }, { "epoch": 1.69, "grad_norm": 4.425063133239746, "learning_rate": 2.21648275862069e-06, "loss": 0.3306, "step": 171525 }, { "epoch": 1.69, "grad_norm": 7.164605140686035, "learning_rate": 2.2160517241379313e-06, "loss": 0.1771, "step": 171550 }, { "epoch": 1.69, "grad_norm": 5.085607528686523, "learning_rate": 2.2156206896551728e-06, "loss": 0.2332, "step": 171575 }, { "epoch": 1.69, "grad_norm": 7.983918190002441, "learning_rate": 2.215189655172414e-06, "loss": 0.1608, "step": 171600 }, { "epoch": 1.69, "grad_norm": 4.503843307495117, "learning_rate": 2.2147586206896553e-06, "loss": 0.2207, "step": 171625 }, { "epoch": 1.69, "grad_norm": 6.111893653869629, "learning_rate": 2.2143275862068967e-06, "loss": 0.1361, "step": 171650 }, { "epoch": 1.69, "grad_norm": 11.844966888427734, "learning_rate": 2.213896551724138e-06, "loss": 0.3041, "step": 171675 }, { "epoch": 1.69, "grad_norm": 6.445775032043457, "learning_rate": 2.2134655172413792e-06, "loss": 0.1309, "step": 171700 }, { "epoch": 1.69, "grad_norm": 4.728521347045898, "learning_rate": 2.2130344827586207e-06, "loss": 0.2905, "step": 171725 }, { "epoch": 1.69, "grad_norm": 2.9884190559387207, "learning_rate": 2.2126034482758626e-06, "loss": 0.129, "step": 171750 }, { "epoch": 1.69, "grad_norm": 4.837414264678955, "learning_rate": 2.2121724137931036e-06, "loss": 0.245, "step": 171775 }, { "epoch": 1.69, "grad_norm": 13.433059692382812, "learning_rate": 2.211741379310345e-06, "loss": 0.1303, "step": 171800 }, { "epoch": 1.69, "grad_norm": 3.9884848594665527, "learning_rate": 2.2113103448275865e-06, "loss": 0.2461, "step": 171825 }, { "epoch": 1.69, "grad_norm": 5.621323108673096, "learning_rate": 2.210879310344828e-06, "loss": 0.117, "step": 171850 }, { "epoch": 1.69, "grad_norm": 4.341618061065674, "learning_rate": 2.210448275862069e-06, "loss": 0.2498, "step": 171875 }, { "epoch": 1.69, "grad_norm": 4.117971420288086, "learning_rate": 2.2100172413793104e-06, "loss": 0.1327, "step": 171900 }, { "epoch": 1.69, "grad_norm": 4.655718803405762, "learning_rate": 2.209586206896552e-06, "loss": 0.2425, "step": 171925 }, { "epoch": 1.69, "grad_norm": 5.783575534820557, "learning_rate": 2.2091551724137934e-06, "loss": 0.1497, "step": 171950 }, { "epoch": 1.69, "grad_norm": 4.5076212882995605, "learning_rate": 2.208724137931035e-06, "loss": 0.2907, "step": 171975 }, { "epoch": 1.69, "grad_norm": 8.874913215637207, "learning_rate": 2.208293103448276e-06, "loss": 0.1424, "step": 172000 }, { "epoch": 1.69, "grad_norm": 4.536614418029785, "learning_rate": 2.2078620689655177e-06, "loss": 0.3521, "step": 172025 }, { "epoch": 1.69, "grad_norm": 5.703948497772217, "learning_rate": 2.2074310344827588e-06, "loss": 0.1596, "step": 172050 }, { "epoch": 1.69, "grad_norm": 4.698641300201416, "learning_rate": 2.2070000000000002e-06, "loss": 0.2841, "step": 172075 }, { "epoch": 1.69, "grad_norm": 10.294648170471191, "learning_rate": 2.2065689655172417e-06, "loss": 0.1481, "step": 172100 }, { "epoch": 1.69, "grad_norm": 4.634677886962891, "learning_rate": 2.206137931034483e-06, "loss": 0.2527, "step": 172125 }, { "epoch": 1.69, "grad_norm": 8.052511215209961, "learning_rate": 2.205706896551724e-06, "loss": 0.1294, "step": 172150 }, { "epoch": 1.69, "grad_norm": 4.7844557762146, "learning_rate": 2.2052758620689656e-06, "loss": 0.289, "step": 172175 }, { "epoch": 1.69, "grad_norm": 10.244983673095703, "learning_rate": 2.204844827586207e-06, "loss": 0.1469, "step": 172200 }, { "epoch": 1.69, "grad_norm": 5.188601970672607, "learning_rate": 2.2044137931034485e-06, "loss": 0.305, "step": 172225 }, { "epoch": 1.69, "grad_norm": 10.240974426269531, "learning_rate": 2.20398275862069e-06, "loss": 0.1318, "step": 172250 }, { "epoch": 1.69, "grad_norm": 5.12186861038208, "learning_rate": 2.203551724137931e-06, "loss": 0.2989, "step": 172275 }, { "epoch": 1.69, "grad_norm": 4.380771636962891, "learning_rate": 2.2031206896551725e-06, "loss": 0.1494, "step": 172300 }, { "epoch": 1.69, "grad_norm": 3.3142342567443848, "learning_rate": 2.202689655172414e-06, "loss": 0.2679, "step": 172325 }, { "epoch": 1.7, "grad_norm": 6.213744163513184, "learning_rate": 2.2022586206896554e-06, "loss": 0.1336, "step": 172350 }, { "epoch": 1.7, "grad_norm": 6.223349571228027, "learning_rate": 2.2018275862068964e-06, "loss": 0.2465, "step": 172375 }, { "epoch": 1.7, "grad_norm": 6.962355136871338, "learning_rate": 2.2013965517241383e-06, "loss": 0.124, "step": 172400 }, { "epoch": 1.7, "grad_norm": 5.403753280639648, "learning_rate": 2.2009655172413793e-06, "loss": 0.3373, "step": 172425 }, { "epoch": 1.7, "grad_norm": 4.402721405029297, "learning_rate": 2.200534482758621e-06, "loss": 0.132, "step": 172450 }, { "epoch": 1.7, "grad_norm": 4.55385160446167, "learning_rate": 2.2001034482758622e-06, "loss": 0.2838, "step": 172475 }, { "epoch": 1.7, "grad_norm": 9.997764587402344, "learning_rate": 2.1996724137931037e-06, "loss": 0.1483, "step": 172500 }, { "epoch": 1.7, "grad_norm": 4.929077625274658, "learning_rate": 2.199241379310345e-06, "loss": 0.2376, "step": 172525 }, { "epoch": 1.7, "grad_norm": 7.698583126068115, "learning_rate": 2.198810344827586e-06, "loss": 0.1573, "step": 172550 }, { "epoch": 1.7, "grad_norm": 5.992501258850098, "learning_rate": 2.1983793103448276e-06, "loss": 0.2572, "step": 172575 }, { "epoch": 1.7, "grad_norm": 12.606345176696777, "learning_rate": 2.197948275862069e-06, "loss": 0.1324, "step": 172600 }, { "epoch": 1.7, "grad_norm": 3.132988452911377, "learning_rate": 2.1975172413793106e-06, "loss": 0.2463, "step": 172625 }, { "epoch": 1.7, "grad_norm": 14.319625854492188, "learning_rate": 2.1970862068965516e-06, "loss": 0.1437, "step": 172650 }, { "epoch": 1.7, "grad_norm": 3.8928964138031006, "learning_rate": 2.1966551724137935e-06, "loss": 0.2869, "step": 172675 }, { "epoch": 1.7, "grad_norm": 5.849367618560791, "learning_rate": 2.196224137931035e-06, "loss": 0.1617, "step": 172700 }, { "epoch": 1.7, "grad_norm": 5.368490219116211, "learning_rate": 2.195793103448276e-06, "loss": 0.2657, "step": 172725 }, { "epoch": 1.7, "grad_norm": 7.86400842666626, "learning_rate": 2.1953620689655174e-06, "loss": 0.148, "step": 172750 }, { "epoch": 1.7, "grad_norm": 4.611790657043457, "learning_rate": 2.194931034482759e-06, "loss": 0.2274, "step": 172775 }, { "epoch": 1.7, "grad_norm": 4.910869121551514, "learning_rate": 2.1945000000000003e-06, "loss": 0.1265, "step": 172800 }, { "epoch": 1.7, "grad_norm": 3.549283027648926, "learning_rate": 2.1940689655172414e-06, "loss": 0.3219, "step": 172825 }, { "epoch": 1.7, "grad_norm": 7.701316833496094, "learning_rate": 2.193637931034483e-06, "loss": 0.1327, "step": 172850 }, { "epoch": 1.7, "grad_norm": 4.3836164474487305, "learning_rate": 2.1932068965517243e-06, "loss": 0.2404, "step": 172875 }, { "epoch": 1.7, "grad_norm": 8.983095169067383, "learning_rate": 2.1927758620689657e-06, "loss": 0.1346, "step": 172900 }, { "epoch": 1.7, "grad_norm": 3.8627192974090576, "learning_rate": 2.192344827586207e-06, "loss": 0.2991, "step": 172925 }, { "epoch": 1.7, "grad_norm": 12.245163917541504, "learning_rate": 2.1919137931034482e-06, "loss": 0.1629, "step": 172950 }, { "epoch": 1.7, "grad_norm": 4.566389083862305, "learning_rate": 2.19148275862069e-06, "loss": 0.2712, "step": 172975 }, { "epoch": 1.7, "grad_norm": 6.361252784729004, "learning_rate": 2.191051724137931e-06, "loss": 0.1623, "step": 173000 }, { "epoch": 1.7, "grad_norm": 4.068488597869873, "learning_rate": 2.1906206896551726e-06, "loss": 0.2746, "step": 173025 }, { "epoch": 1.7, "grad_norm": 7.373106956481934, "learning_rate": 2.190189655172414e-06, "loss": 0.1423, "step": 173050 }, { "epoch": 1.7, "grad_norm": 4.4603962898254395, "learning_rate": 2.1897586206896555e-06, "loss": 0.2818, "step": 173075 }, { "epoch": 1.7, "grad_norm": 5.6240386962890625, "learning_rate": 2.1893275862068965e-06, "loss": 0.1368, "step": 173100 }, { "epoch": 1.7, "grad_norm": 5.099551200866699, "learning_rate": 2.188896551724138e-06, "loss": 0.2766, "step": 173125 }, { "epoch": 1.7, "grad_norm": 13.216706275939941, "learning_rate": 2.1884655172413795e-06, "loss": 0.1831, "step": 173150 }, { "epoch": 1.7, "grad_norm": 3.4628379344940186, "learning_rate": 2.188034482758621e-06, "loss": 0.2339, "step": 173175 }, { "epoch": 1.7, "grad_norm": 10.444517135620117, "learning_rate": 2.1876034482758624e-06, "loss": 0.1623, "step": 173200 }, { "epoch": 1.7, "grad_norm": 7.924088001251221, "learning_rate": 2.1871724137931034e-06, "loss": 0.2053, "step": 173225 }, { "epoch": 1.7, "grad_norm": 8.567046165466309, "learning_rate": 2.1867413793103453e-06, "loss": 0.1629, "step": 173250 }, { "epoch": 1.7, "grad_norm": 4.345605373382568, "learning_rate": 2.1863103448275863e-06, "loss": 0.216, "step": 173275 }, { "epoch": 1.7, "grad_norm": 5.49983549118042, "learning_rate": 2.1858793103448278e-06, "loss": 0.1385, "step": 173300 }, { "epoch": 1.7, "grad_norm": 4.896435737609863, "learning_rate": 2.1854482758620692e-06, "loss": 0.289, "step": 173325 }, { "epoch": 1.7, "grad_norm": 9.528237342834473, "learning_rate": 2.1850172413793107e-06, "loss": 0.1205, "step": 173350 }, { "epoch": 1.71, "grad_norm": 5.009296417236328, "learning_rate": 2.1845862068965517e-06, "loss": 0.2977, "step": 173375 }, { "epoch": 1.71, "grad_norm": 6.484594345092773, "learning_rate": 2.184155172413793e-06, "loss": 0.1649, "step": 173400 }, { "epoch": 1.71, "grad_norm": 3.155658006668091, "learning_rate": 2.1837241379310346e-06, "loss": 0.2604, "step": 173425 }, { "epoch": 1.71, "grad_norm": 2.222428560256958, "learning_rate": 2.183293103448276e-06, "loss": 0.1233, "step": 173450 }, { "epoch": 1.71, "grad_norm": 5.509130001068115, "learning_rate": 2.1828620689655175e-06, "loss": 0.2481, "step": 173475 }, { "epoch": 1.71, "grad_norm": 4.194835186004639, "learning_rate": 2.1824310344827586e-06, "loss": 0.1371, "step": 173500 }, { "epoch": 1.71, "grad_norm": 5.563050270080566, "learning_rate": 2.182e-06, "loss": 0.2494, "step": 173525 }, { "epoch": 1.71, "grad_norm": 4.883571147918701, "learning_rate": 2.1815689655172415e-06, "loss": 0.1438, "step": 173550 }, { "epoch": 1.71, "grad_norm": 4.785200119018555, "learning_rate": 2.181137931034483e-06, "loss": 0.2542, "step": 173575 }, { "epoch": 1.71, "grad_norm": 9.276264190673828, "learning_rate": 2.180706896551724e-06, "loss": 0.1539, "step": 173600 }, { "epoch": 1.71, "grad_norm": 3.324932336807251, "learning_rate": 2.180293103448276e-06, "loss": 0.3476, "step": 173625 }, { "epoch": 1.71, "grad_norm": 5.583281993865967, "learning_rate": 2.1798620689655177e-06, "loss": 0.1261, "step": 173650 }, { "epoch": 1.71, "grad_norm": 4.390223503112793, "learning_rate": 2.1794310344827588e-06, "loss": 0.2605, "step": 173675 }, { "epoch": 1.71, "grad_norm": 10.84174633026123, "learning_rate": 2.1790000000000002e-06, "loss": 0.138, "step": 173700 }, { "epoch": 1.71, "grad_norm": 4.773674964904785, "learning_rate": 2.1785689655172417e-06, "loss": 0.3309, "step": 173725 }, { "epoch": 1.71, "grad_norm": 7.37210750579834, "learning_rate": 2.178137931034483e-06, "loss": 0.1211, "step": 173750 }, { "epoch": 1.71, "grad_norm": 4.419183731079102, "learning_rate": 2.177706896551724e-06, "loss": 0.2622, "step": 173775 }, { "epoch": 1.71, "grad_norm": 2.6512465476989746, "learning_rate": 2.1772758620689656e-06, "loss": 0.1454, "step": 173800 }, { "epoch": 1.71, "grad_norm": 4.288012981414795, "learning_rate": 2.176844827586207e-06, "loss": 0.2519, "step": 173825 }, { "epoch": 1.71, "grad_norm": 12.919007301330566, "learning_rate": 2.1764137931034485e-06, "loss": 0.1436, "step": 173850 }, { "epoch": 1.71, "grad_norm": 3.9068119525909424, "learning_rate": 2.1759827586206896e-06, "loss": 0.2546, "step": 173875 }, { "epoch": 1.71, "grad_norm": 9.904935836791992, "learning_rate": 2.175551724137931e-06, "loss": 0.1464, "step": 173900 }, { "epoch": 1.71, "grad_norm": 3.5646398067474365, "learning_rate": 2.1751206896551725e-06, "loss": 0.3027, "step": 173925 }, { "epoch": 1.71, "grad_norm": 6.843653678894043, "learning_rate": 2.174689655172414e-06, "loss": 0.1317, "step": 173950 }, { "epoch": 1.71, "grad_norm": 4.47464656829834, "learning_rate": 2.1742586206896554e-06, "loss": 0.3513, "step": 173975 }, { "epoch": 1.71, "grad_norm": 12.389408111572266, "learning_rate": 2.173827586206897e-06, "loss": 0.147, "step": 174000 }, { "epoch": 1.71, "grad_norm": 3.997957706451416, "learning_rate": 2.1733965517241383e-06, "loss": 0.2832, "step": 174025 }, { "epoch": 1.71, "grad_norm": 7.556267738342285, "learning_rate": 2.1729655172413793e-06, "loss": 0.1637, "step": 174050 }, { "epoch": 1.71, "grad_norm": 4.478454113006592, "learning_rate": 2.172534482758621e-06, "loss": 0.3208, "step": 174075 }, { "epoch": 1.71, "grad_norm": 21.31414222717285, "learning_rate": 2.1721034482758623e-06, "loss": 0.1771, "step": 174100 }, { "epoch": 1.71, "grad_norm": 4.792428016662598, "learning_rate": 2.1716724137931037e-06, "loss": 0.2852, "step": 174125 }, { "epoch": 1.71, "grad_norm": 7.765426158905029, "learning_rate": 2.171241379310345e-06, "loss": 0.1313, "step": 174150 }, { "epoch": 1.71, "grad_norm": 5.449325084686279, "learning_rate": 2.170810344827586e-06, "loss": 0.3407, "step": 174175 }, { "epoch": 1.71, "grad_norm": 10.583352088928223, "learning_rate": 2.1703793103448277e-06, "loss": 0.1209, "step": 174200 }, { "epoch": 1.71, "grad_norm": 4.442976951599121, "learning_rate": 2.169948275862069e-06, "loss": 0.3126, "step": 174225 }, { "epoch": 1.71, "grad_norm": 7.465710163116455, "learning_rate": 2.1695172413793106e-06, "loss": 0.1523, "step": 174250 }, { "epoch": 1.71, "grad_norm": 5.009947776794434, "learning_rate": 2.1690862068965516e-06, "loss": 0.303, "step": 174275 }, { "epoch": 1.71, "grad_norm": 7.804902076721191, "learning_rate": 2.1686551724137935e-06, "loss": 0.1384, "step": 174300 }, { "epoch": 1.71, "grad_norm": 3.8540425300598145, "learning_rate": 2.1682241379310345e-06, "loss": 0.2454, "step": 174325 }, { "epoch": 1.71, "grad_norm": 8.986705780029297, "learning_rate": 2.167793103448276e-06, "loss": 0.1532, "step": 174350 }, { "epoch": 1.72, "grad_norm": 6.272914886474609, "learning_rate": 2.1673620689655174e-06, "loss": 0.2757, "step": 174375 }, { "epoch": 1.72, "grad_norm": 25.696128845214844, "learning_rate": 2.166931034482759e-06, "loss": 0.1477, "step": 174400 }, { "epoch": 1.72, "grad_norm": 3.7096168994903564, "learning_rate": 2.1665000000000003e-06, "loss": 0.2922, "step": 174425 }, { "epoch": 1.72, "grad_norm": 3.3345096111297607, "learning_rate": 2.1660689655172414e-06, "loss": 0.1444, "step": 174450 }, { "epoch": 1.72, "grad_norm": 4.344369411468506, "learning_rate": 2.165637931034483e-06, "loss": 0.2373, "step": 174475 }, { "epoch": 1.72, "grad_norm": 15.071919441223145, "learning_rate": 2.1652068965517243e-06, "loss": 0.1269, "step": 174500 }, { "epoch": 1.72, "grad_norm": 4.602278709411621, "learning_rate": 2.1647758620689658e-06, "loss": 0.2589, "step": 174525 }, { "epoch": 1.72, "grad_norm": 6.534304141998291, "learning_rate": 2.1643448275862068e-06, "loss": 0.1566, "step": 174550 }, { "epoch": 1.72, "grad_norm": 3.815180778503418, "learning_rate": 2.1639137931034487e-06, "loss": 0.3035, "step": 174575 }, { "epoch": 1.72, "grad_norm": 11.592232704162598, "learning_rate": 2.16348275862069e-06, "loss": 0.1554, "step": 174600 }, { "epoch": 1.72, "grad_norm": 5.339054584503174, "learning_rate": 2.163051724137931e-06, "loss": 0.267, "step": 174625 }, { "epoch": 1.72, "grad_norm": 8.81287956237793, "learning_rate": 2.1626206896551726e-06, "loss": 0.137, "step": 174650 }, { "epoch": 1.72, "grad_norm": 2.9374327659606934, "learning_rate": 2.162189655172414e-06, "loss": 0.3326, "step": 174675 }, { "epoch": 1.72, "grad_norm": 6.981296062469482, "learning_rate": 2.1617586206896555e-06, "loss": 0.1385, "step": 174700 }, { "epoch": 1.72, "grad_norm": 4.774716854095459, "learning_rate": 2.1613275862068966e-06, "loss": 0.2517, "step": 174725 }, { "epoch": 1.72, "grad_norm": 15.5559663772583, "learning_rate": 2.160896551724138e-06, "loss": 0.1795, "step": 174750 }, { "epoch": 1.72, "grad_norm": 5.199519157409668, "learning_rate": 2.1604655172413795e-06, "loss": 0.2851, "step": 174775 }, { "epoch": 1.72, "grad_norm": 2.5492467880249023, "learning_rate": 2.160034482758621e-06, "loss": 0.1451, "step": 174800 }, { "epoch": 1.72, "grad_norm": 7.128553867340088, "learning_rate": 2.1596034482758624e-06, "loss": 0.3126, "step": 174825 }, { "epoch": 1.72, "grad_norm": 7.438849925994873, "learning_rate": 2.1591724137931034e-06, "loss": 0.1275, "step": 174850 }, { "epoch": 1.72, "grad_norm": 3.6295082569122314, "learning_rate": 2.1587413793103453e-06, "loss": 0.2733, "step": 174875 }, { "epoch": 1.72, "grad_norm": 7.666846752166748, "learning_rate": 2.1583103448275863e-06, "loss": 0.1574, "step": 174900 }, { "epoch": 1.72, "grad_norm": 3.2526721954345703, "learning_rate": 2.1578793103448278e-06, "loss": 0.2861, "step": 174925 }, { "epoch": 1.72, "grad_norm": 7.68691873550415, "learning_rate": 2.1574482758620692e-06, "loss": 0.14, "step": 174950 }, { "epoch": 1.72, "grad_norm": 4.225228309631348, "learning_rate": 2.1570172413793107e-06, "loss": 0.2713, "step": 174975 }, { "epoch": 1.72, "grad_norm": 15.01956844329834, "learning_rate": 2.1565862068965517e-06, "loss": 0.181, "step": 175000 }, { "epoch": 1.72, "grad_norm": 4.046521186828613, "learning_rate": 2.156155172413793e-06, "loss": 0.225, "step": 175025 }, { "epoch": 1.72, "grad_norm": 6.495962142944336, "learning_rate": 2.1557241379310346e-06, "loss": 0.1317, "step": 175050 }, { "epoch": 1.72, "grad_norm": 5.248398780822754, "learning_rate": 2.155293103448276e-06, "loss": 0.2024, "step": 175075 }, { "epoch": 1.72, "grad_norm": 6.556440830230713, "learning_rate": 2.1548620689655176e-06, "loss": 0.1203, "step": 175100 }, { "epoch": 1.72, "grad_norm": 4.877569198608398, "learning_rate": 2.1544310344827586e-06, "loss": 0.282, "step": 175125 }, { "epoch": 1.72, "grad_norm": 8.159536361694336, "learning_rate": 2.1540000000000005e-06, "loss": 0.171, "step": 175150 }, { "epoch": 1.72, "grad_norm": 3.853026866912842, "learning_rate": 2.1535689655172415e-06, "loss": 0.2697, "step": 175175 }, { "epoch": 1.72, "grad_norm": 11.60149097442627, "learning_rate": 2.153137931034483e-06, "loss": 0.1728, "step": 175200 }, { "epoch": 1.72, "grad_norm": 5.905905246734619, "learning_rate": 2.1527068965517244e-06, "loss": 0.303, "step": 175225 }, { "epoch": 1.72, "grad_norm": 14.227299690246582, "learning_rate": 2.152275862068966e-06, "loss": 0.177, "step": 175250 }, { "epoch": 1.72, "grad_norm": 3.9012715816497803, "learning_rate": 2.151844827586207e-06, "loss": 0.279, "step": 175275 }, { "epoch": 1.72, "grad_norm": 5.45382833480835, "learning_rate": 2.1514137931034484e-06, "loss": 0.1541, "step": 175300 }, { "epoch": 1.72, "grad_norm": 3.052006721496582, "learning_rate": 2.15098275862069e-06, "loss": 0.3181, "step": 175325 }, { "epoch": 1.72, "grad_norm": 18.778263092041016, "learning_rate": 2.1505517241379313e-06, "loss": 0.1563, "step": 175350 }, { "epoch": 1.72, "grad_norm": 4.714706897735596, "learning_rate": 2.1501206896551727e-06, "loss": 0.2784, "step": 175375 }, { "epoch": 1.73, "grad_norm": 6.86438512802124, "learning_rate": 2.1496896551724138e-06, "loss": 0.1475, "step": 175400 }, { "epoch": 1.73, "grad_norm": 4.436662197113037, "learning_rate": 2.1492586206896552e-06, "loss": 0.2757, "step": 175425 }, { "epoch": 1.73, "grad_norm": 9.988724708557129, "learning_rate": 2.1488275862068967e-06, "loss": 0.0987, "step": 175450 }, { "epoch": 1.73, "grad_norm": 6.071200847625732, "learning_rate": 2.148396551724138e-06, "loss": 0.261, "step": 175475 }, { "epoch": 1.73, "grad_norm": 11.538625717163086, "learning_rate": 2.147965517241379e-06, "loss": 0.107, "step": 175500 }, { "epoch": 1.73, "grad_norm": 4.336019515991211, "learning_rate": 2.147534482758621e-06, "loss": 0.2666, "step": 175525 }, { "epoch": 1.73, "grad_norm": 4.65252161026001, "learning_rate": 2.1471034482758625e-06, "loss": 0.1713, "step": 175550 }, { "epoch": 1.73, "grad_norm": 3.1912529468536377, "learning_rate": 2.1466724137931035e-06, "loss": 0.291, "step": 175575 }, { "epoch": 1.73, "grad_norm": 16.91691017150879, "learning_rate": 2.146241379310345e-06, "loss": 0.1511, "step": 175600 }, { "epoch": 1.73, "grad_norm": 5.552131652832031, "learning_rate": 2.145827586206897e-06, "loss": 0.272, "step": 175625 }, { "epoch": 1.73, "grad_norm": 10.069876670837402, "learning_rate": 2.1453965517241383e-06, "loss": 0.1593, "step": 175650 }, { "epoch": 1.73, "grad_norm": 5.2025346755981445, "learning_rate": 2.1449655172413794e-06, "loss": 0.2994, "step": 175675 }, { "epoch": 1.73, "grad_norm": 5.8807268142700195, "learning_rate": 2.144534482758621e-06, "loss": 0.1355, "step": 175700 }, { "epoch": 1.73, "grad_norm": 3.8917794227600098, "learning_rate": 2.1441034482758623e-06, "loss": 0.2662, "step": 175725 }, { "epoch": 1.73, "grad_norm": 8.714868545532227, "learning_rate": 2.1436724137931037e-06, "loss": 0.1333, "step": 175750 }, { "epoch": 1.73, "grad_norm": 4.471154689788818, "learning_rate": 2.1432413793103448e-06, "loss": 0.2487, "step": 175775 }, { "epoch": 1.73, "grad_norm": 11.069679260253906, "learning_rate": 2.1428103448275862e-06, "loss": 0.1179, "step": 175800 }, { "epoch": 1.73, "grad_norm": 3.7990660667419434, "learning_rate": 2.1423793103448277e-06, "loss": 0.2535, "step": 175825 }, { "epoch": 1.73, "grad_norm": 7.308319568634033, "learning_rate": 2.141948275862069e-06, "loss": 0.1582, "step": 175850 }, { "epoch": 1.73, "grad_norm": 3.1055550575256348, "learning_rate": 2.1415172413793106e-06, "loss": 0.2759, "step": 175875 }, { "epoch": 1.73, "grad_norm": 9.58213996887207, "learning_rate": 2.1410862068965516e-06, "loss": 0.1604, "step": 175900 }, { "epoch": 1.73, "grad_norm": 4.804837703704834, "learning_rate": 2.1406551724137935e-06, "loss": 0.2732, "step": 175925 }, { "epoch": 1.73, "grad_norm": 11.780655860900879, "learning_rate": 2.1402241379310345e-06, "loss": 0.1673, "step": 175950 }, { "epoch": 1.73, "grad_norm": 4.875974178314209, "learning_rate": 2.139793103448276e-06, "loss": 0.3109, "step": 175975 }, { "epoch": 1.73, "grad_norm": 7.524520397186279, "learning_rate": 2.1393620689655174e-06, "loss": 0.1401, "step": 176000 }, { "epoch": 1.73, "grad_norm": 4.801839828491211, "learning_rate": 2.138931034482759e-06, "loss": 0.3039, "step": 176025 }, { "epoch": 1.73, "grad_norm": 11.280285835266113, "learning_rate": 2.1385000000000004e-06, "loss": 0.1642, "step": 176050 }, { "epoch": 1.73, "grad_norm": 4.331546306610107, "learning_rate": 2.1380689655172414e-06, "loss": 0.2657, "step": 176075 }, { "epoch": 1.73, "grad_norm": 9.442715644836426, "learning_rate": 2.137637931034483e-06, "loss": 0.1657, "step": 176100 }, { "epoch": 1.73, "grad_norm": 9.3032865524292, "learning_rate": 2.1372068965517243e-06, "loss": 0.2624, "step": 176125 }, { "epoch": 1.73, "grad_norm": 5.727097034454346, "learning_rate": 2.1367758620689658e-06, "loss": 0.1298, "step": 176150 }, { "epoch": 1.73, "grad_norm": 3.7452809810638428, "learning_rate": 2.136344827586207e-06, "loss": 0.2789, "step": 176175 }, { "epoch": 1.73, "grad_norm": 6.605922222137451, "learning_rate": 2.1359137931034487e-06, "loss": 0.0865, "step": 176200 }, { "epoch": 1.73, "grad_norm": 3.8722450733184814, "learning_rate": 2.1354827586206897e-06, "loss": 0.2459, "step": 176225 }, { "epoch": 1.73, "grad_norm": 8.179876327514648, "learning_rate": 2.135051724137931e-06, "loss": 0.1062, "step": 176250 }, { "epoch": 1.73, "grad_norm": 5.378419876098633, "learning_rate": 2.1346206896551726e-06, "loss": 0.2808, "step": 176275 }, { "epoch": 1.73, "grad_norm": 8.576038360595703, "learning_rate": 2.134189655172414e-06, "loss": 0.1318, "step": 176300 }, { "epoch": 1.73, "grad_norm": 5.5587687492370605, "learning_rate": 2.1337586206896555e-06, "loss": 0.2725, "step": 176325 }, { "epoch": 1.73, "grad_norm": 0.8645382523536682, "learning_rate": 2.1333275862068966e-06, "loss": 0.1888, "step": 176350 }, { "epoch": 1.73, "grad_norm": 4.6878509521484375, "learning_rate": 2.132896551724138e-06, "loss": 0.2693, "step": 176375 }, { "epoch": 1.73, "grad_norm": 12.286230087280273, "learning_rate": 2.1324655172413795e-06, "loss": 0.1829, "step": 176400 }, { "epoch": 1.74, "grad_norm": 3.9686279296875, "learning_rate": 2.132034482758621e-06, "loss": 0.2278, "step": 176425 }, { "epoch": 1.74, "grad_norm": 6.9047956466674805, "learning_rate": 2.131603448275862e-06, "loss": 0.1422, "step": 176450 }, { "epoch": 1.74, "grad_norm": 12.881918907165527, "learning_rate": 2.1311724137931034e-06, "loss": 0.2636, "step": 176475 }, { "epoch": 1.74, "grad_norm": 10.5048828125, "learning_rate": 2.1307413793103453e-06, "loss": 0.1525, "step": 176500 }, { "epoch": 1.74, "grad_norm": 5.006164073944092, "learning_rate": 2.1303103448275863e-06, "loss": 0.3196, "step": 176525 }, { "epoch": 1.74, "grad_norm": 20.929922103881836, "learning_rate": 2.129879310344828e-06, "loss": 0.1355, "step": 176550 }, { "epoch": 1.74, "grad_norm": 5.680022716522217, "learning_rate": 2.1294482758620693e-06, "loss": 0.2562, "step": 176575 }, { "epoch": 1.74, "grad_norm": 6.250723838806152, "learning_rate": 2.1290172413793107e-06, "loss": 0.1344, "step": 176600 }, { "epoch": 1.74, "grad_norm": 4.420844078063965, "learning_rate": 2.1285862068965517e-06, "loss": 0.3116, "step": 176625 }, { "epoch": 1.74, "grad_norm": 10.845096588134766, "learning_rate": 2.128155172413793e-06, "loss": 0.1576, "step": 176650 }, { "epoch": 1.74, "grad_norm": 4.2107744216918945, "learning_rate": 2.1277241379310347e-06, "loss": 0.247, "step": 176675 }, { "epoch": 1.74, "grad_norm": 7.77889347076416, "learning_rate": 2.127293103448276e-06, "loss": 0.1185, "step": 176700 }, { "epoch": 1.74, "grad_norm": 4.410569190979004, "learning_rate": 2.126862068965517e-06, "loss": 0.2269, "step": 176725 }, { "epoch": 1.74, "grad_norm": 8.253308296203613, "learning_rate": 2.1264310344827586e-06, "loss": 0.1757, "step": 176750 }, { "epoch": 1.74, "grad_norm": 4.456148624420166, "learning_rate": 2.1260000000000005e-06, "loss": 0.2611, "step": 176775 }, { "epoch": 1.74, "grad_norm": 10.977914810180664, "learning_rate": 2.1255689655172415e-06, "loss": 0.1526, "step": 176800 }, { "epoch": 1.74, "grad_norm": 4.6829986572265625, "learning_rate": 2.125137931034483e-06, "loss": 0.2951, "step": 176825 }, { "epoch": 1.74, "grad_norm": 5.982604026794434, "learning_rate": 2.1247068965517244e-06, "loss": 0.1365, "step": 176850 }, { "epoch": 1.74, "grad_norm": 4.026647090911865, "learning_rate": 2.124275862068966e-06, "loss": 0.2531, "step": 176875 }, { "epoch": 1.74, "grad_norm": 6.28200626373291, "learning_rate": 2.123844827586207e-06, "loss": 0.1396, "step": 176900 }, { "epoch": 1.74, "grad_norm": 4.691135883331299, "learning_rate": 2.1234137931034484e-06, "loss": 0.2382, "step": 176925 }, { "epoch": 1.74, "grad_norm": 15.941593170166016, "learning_rate": 2.12298275862069e-06, "loss": 0.1456, "step": 176950 }, { "epoch": 1.74, "grad_norm": 5.382712364196777, "learning_rate": 2.1225517241379313e-06, "loss": 0.2525, "step": 176975 }, { "epoch": 1.74, "grad_norm": 13.475997924804688, "learning_rate": 2.1221206896551727e-06, "loss": 0.1536, "step": 177000 }, { "epoch": 1.74, "grad_norm": 11.445420265197754, "learning_rate": 2.1216896551724138e-06, "loss": 0.2619, "step": 177025 }, { "epoch": 1.74, "grad_norm": 4.841716289520264, "learning_rate": 2.1212586206896552e-06, "loss": 0.1454, "step": 177050 }, { "epoch": 1.74, "grad_norm": 4.599578380584717, "learning_rate": 2.1208275862068967e-06, "loss": 0.2901, "step": 177075 }, { "epoch": 1.74, "grad_norm": 11.730552673339844, "learning_rate": 2.120396551724138e-06, "loss": 0.1834, "step": 177100 }, { "epoch": 1.74, "grad_norm": 3.396017074584961, "learning_rate": 2.119965517241379e-06, "loss": 0.2835, "step": 177125 }, { "epoch": 1.74, "grad_norm": 6.363265514373779, "learning_rate": 2.119534482758621e-06, "loss": 0.1399, "step": 177150 }, { "epoch": 1.74, "grad_norm": 4.729557991027832, "learning_rate": 2.119103448275862e-06, "loss": 0.2507, "step": 177175 }, { "epoch": 1.74, "grad_norm": 7.869385242462158, "learning_rate": 2.1186724137931035e-06, "loss": 0.146, "step": 177200 }, { "epoch": 1.74, "grad_norm": 5.354244232177734, "learning_rate": 2.118241379310345e-06, "loss": 0.3247, "step": 177225 }, { "epoch": 1.74, "grad_norm": 6.4059906005859375, "learning_rate": 2.1178103448275865e-06, "loss": 0.1451, "step": 177250 }, { "epoch": 1.74, "grad_norm": 5.320493698120117, "learning_rate": 2.117379310344828e-06, "loss": 0.2421, "step": 177275 }, { "epoch": 1.74, "grad_norm": 8.872084617614746, "learning_rate": 2.116948275862069e-06, "loss": 0.1531, "step": 177300 }, { "epoch": 1.74, "grad_norm": 6.043881416320801, "learning_rate": 2.1165172413793104e-06, "loss": 0.276, "step": 177325 }, { "epoch": 1.74, "grad_norm": 6.714780330657959, "learning_rate": 2.116086206896552e-06, "loss": 0.1532, "step": 177350 }, { "epoch": 1.74, "grad_norm": 12.11473274230957, "learning_rate": 2.1156551724137933e-06, "loss": 0.2865, "step": 177375 }, { "epoch": 1.74, "grad_norm": 16.702054977416992, "learning_rate": 2.1152241379310344e-06, "loss": 0.1576, "step": 177400 }, { "epoch": 1.75, "grad_norm": 4.909458160400391, "learning_rate": 2.1147931034482762e-06, "loss": 0.2401, "step": 177425 }, { "epoch": 1.75, "grad_norm": 7.908100128173828, "learning_rate": 2.1143620689655177e-06, "loss": 0.1455, "step": 177450 }, { "epoch": 1.75, "grad_norm": 3.7548141479492188, "learning_rate": 2.1139310344827587e-06, "loss": 0.2652, "step": 177475 }, { "epoch": 1.75, "grad_norm": 9.75304126739502, "learning_rate": 2.1135e-06, "loss": 0.1483, "step": 177500 }, { "epoch": 1.75, "grad_norm": 3.1038570404052734, "learning_rate": 2.1130689655172416e-06, "loss": 0.2539, "step": 177525 }, { "epoch": 1.75, "grad_norm": 15.10435962677002, "learning_rate": 2.112637931034483e-06, "loss": 0.1462, "step": 177550 }, { "epoch": 1.75, "grad_norm": 3.625256299972534, "learning_rate": 2.112206896551724e-06, "loss": 0.2827, "step": 177575 }, { "epoch": 1.75, "grad_norm": 7.168696403503418, "learning_rate": 2.1117758620689656e-06, "loss": 0.1472, "step": 177600 }, { "epoch": 1.75, "grad_norm": 4.66148042678833, "learning_rate": 2.111344827586207e-06, "loss": 0.2652, "step": 177625 }, { "epoch": 1.75, "grad_norm": 7.014576435089111, "learning_rate": 2.1109137931034485e-06, "loss": 0.1418, "step": 177650 }, { "epoch": 1.75, "grad_norm": 5.19673490524292, "learning_rate": 2.1105e-06, "loss": 0.2597, "step": 177675 }, { "epoch": 1.75, "grad_norm": 10.260146141052246, "learning_rate": 2.1100689655172414e-06, "loss": 0.1463, "step": 177700 }, { "epoch": 1.75, "grad_norm": 5.782379627227783, "learning_rate": 2.109637931034483e-06, "loss": 0.2434, "step": 177725 }, { "epoch": 1.75, "grad_norm": 8.114351272583008, "learning_rate": 2.1092068965517243e-06, "loss": 0.1571, "step": 177750 }, { "epoch": 1.75, "grad_norm": 4.637581825256348, "learning_rate": 2.1087758620689658e-06, "loss": 0.2927, "step": 177775 }, { "epoch": 1.75, "grad_norm": 10.024396896362305, "learning_rate": 2.108344827586207e-06, "loss": 0.1429, "step": 177800 }, { "epoch": 1.75, "grad_norm": 4.666243076324463, "learning_rate": 2.1079137931034487e-06, "loss": 0.2618, "step": 177825 }, { "epoch": 1.75, "grad_norm": 6.70332145690918, "learning_rate": 2.1074827586206897e-06, "loss": 0.1628, "step": 177850 }, { "epoch": 1.75, "grad_norm": 5.271124362945557, "learning_rate": 2.107051724137931e-06, "loss": 0.2583, "step": 177875 }, { "epoch": 1.75, "grad_norm": 12.37494945526123, "learning_rate": 2.1066206896551726e-06, "loss": 0.1436, "step": 177900 }, { "epoch": 1.75, "grad_norm": 4.129837989807129, "learning_rate": 2.106189655172414e-06, "loss": 0.3068, "step": 177925 }, { "epoch": 1.75, "grad_norm": 9.785457611083984, "learning_rate": 2.1057586206896556e-06, "loss": 0.1106, "step": 177950 }, { "epoch": 1.75, "grad_norm": 6.288476467132568, "learning_rate": 2.1053275862068966e-06, "loss": 0.2897, "step": 177975 }, { "epoch": 1.75, "grad_norm": 8.778096199035645, "learning_rate": 2.104896551724138e-06, "loss": 0.1685, "step": 178000 }, { "epoch": 1.75, "grad_norm": 4.46769905090332, "learning_rate": 2.1044655172413795e-06, "loss": 0.263, "step": 178025 }, { "epoch": 1.75, "grad_norm": 7.603535175323486, "learning_rate": 2.104034482758621e-06, "loss": 0.156, "step": 178050 }, { "epoch": 1.75, "grad_norm": 5.247137069702148, "learning_rate": 2.103603448275862e-06, "loss": 0.2571, "step": 178075 }, { "epoch": 1.75, "grad_norm": 5.610273838043213, "learning_rate": 2.103172413793104e-06, "loss": 0.1387, "step": 178100 }, { "epoch": 1.75, "grad_norm": 4.469913959503174, "learning_rate": 2.102741379310345e-06, "loss": 0.2751, "step": 178125 }, { "epoch": 1.75, "grad_norm": 7.317266464233398, "learning_rate": 2.1023103448275864e-06, "loss": 0.1202, "step": 178150 }, { "epoch": 1.75, "grad_norm": 6.028749465942383, "learning_rate": 2.101879310344828e-06, "loss": 0.2697, "step": 178175 }, { "epoch": 1.75, "grad_norm": 6.156622409820557, "learning_rate": 2.1014482758620693e-06, "loss": 0.1355, "step": 178200 }, { "epoch": 1.75, "grad_norm": 4.526296615600586, "learning_rate": 2.1010172413793107e-06, "loss": 0.2649, "step": 178225 }, { "epoch": 1.75, "grad_norm": 5.232723712921143, "learning_rate": 2.1005862068965518e-06, "loss": 0.1448, "step": 178250 }, { "epoch": 1.75, "grad_norm": 4.267373561859131, "learning_rate": 2.1001551724137932e-06, "loss": 0.3043, "step": 178275 }, { "epoch": 1.75, "grad_norm": 6.8621063232421875, "learning_rate": 2.0997241379310347e-06, "loss": 0.1392, "step": 178300 }, { "epoch": 1.75, "grad_norm": 4.289266109466553, "learning_rate": 2.099293103448276e-06, "loss": 0.2377, "step": 178325 }, { "epoch": 1.75, "grad_norm": 10.185243606567383, "learning_rate": 2.098862068965517e-06, "loss": 0.15, "step": 178350 }, { "epoch": 1.75, "grad_norm": 3.786388635635376, "learning_rate": 2.0984310344827586e-06, "loss": 0.2689, "step": 178375 }, { "epoch": 1.75, "grad_norm": 11.284969329833984, "learning_rate": 2.098e-06, "loss": 0.1357, "step": 178400 }, { "epoch": 1.75, "grad_norm": 4.80619478225708, "learning_rate": 2.0975689655172415e-06, "loss": 0.2672, "step": 178425 }, { "epoch": 1.76, "grad_norm": 7.337002277374268, "learning_rate": 2.097137931034483e-06, "loss": 0.1371, "step": 178450 }, { "epoch": 1.76, "grad_norm": 4.369341850280762, "learning_rate": 2.0967068965517244e-06, "loss": 0.2226, "step": 178475 }, { "epoch": 1.76, "grad_norm": 6.863418102264404, "learning_rate": 2.096275862068966e-06, "loss": 0.1445, "step": 178500 }, { "epoch": 1.76, "grad_norm": 3.474092483520508, "learning_rate": 2.095844827586207e-06, "loss": 0.258, "step": 178525 }, { "epoch": 1.76, "grad_norm": 7.720891952514648, "learning_rate": 2.0954137931034484e-06, "loss": 0.15, "step": 178550 }, { "epoch": 1.76, "grad_norm": 5.3808207511901855, "learning_rate": 2.09498275862069e-06, "loss": 0.2847, "step": 178575 }, { "epoch": 1.76, "grad_norm": 5.428187847137451, "learning_rate": 2.0945517241379313e-06, "loss": 0.1308, "step": 178600 }, { "epoch": 1.76, "grad_norm": 4.225876808166504, "learning_rate": 2.0941206896551723e-06, "loss": 0.291, "step": 178625 }, { "epoch": 1.76, "grad_norm": 7.006783962249756, "learning_rate": 2.093689655172414e-06, "loss": 0.1907, "step": 178650 }, { "epoch": 1.76, "grad_norm": 4.094475269317627, "learning_rate": 2.0932586206896557e-06, "loss": 0.2498, "step": 178675 }, { "epoch": 1.76, "grad_norm": 10.840503692626953, "learning_rate": 2.0928275862068967e-06, "loss": 0.1555, "step": 178700 }, { "epoch": 1.76, "grad_norm": 4.306607723236084, "learning_rate": 2.092396551724138e-06, "loss": 0.2368, "step": 178725 }, { "epoch": 1.76, "grad_norm": 7.691188812255859, "learning_rate": 2.0919655172413796e-06, "loss": 0.217, "step": 178750 }, { "epoch": 1.76, "grad_norm": 3.905383825302124, "learning_rate": 2.091534482758621e-06, "loss": 0.2586, "step": 178775 }, { "epoch": 1.76, "grad_norm": 7.440792560577393, "learning_rate": 2.091103448275862e-06, "loss": 0.1456, "step": 178800 }, { "epoch": 1.76, "grad_norm": 4.545705318450928, "learning_rate": 2.0906724137931036e-06, "loss": 0.2762, "step": 178825 }, { "epoch": 1.76, "grad_norm": 8.022393226623535, "learning_rate": 2.090241379310345e-06, "loss": 0.1568, "step": 178850 }, { "epoch": 1.76, "grad_norm": 3.923818349838257, "learning_rate": 2.0898103448275865e-06, "loss": 0.2462, "step": 178875 }, { "epoch": 1.76, "grad_norm": 2.979457378387451, "learning_rate": 2.089379310344828e-06, "loss": 0.1533, "step": 178900 }, { "epoch": 1.76, "grad_norm": 5.543943405151367, "learning_rate": 2.088948275862069e-06, "loss": 0.2293, "step": 178925 }, { "epoch": 1.76, "grad_norm": 7.534402847290039, "learning_rate": 2.0885172413793104e-06, "loss": 0.1614, "step": 178950 }, { "epoch": 1.76, "grad_norm": 4.360175609588623, "learning_rate": 2.088086206896552e-06, "loss": 0.2712, "step": 178975 }, { "epoch": 1.76, "grad_norm": 7.862802028656006, "learning_rate": 2.0876551724137933e-06, "loss": 0.1758, "step": 179000 }, { "epoch": 1.76, "grad_norm": 5.228415012359619, "learning_rate": 2.0872241379310344e-06, "loss": 0.2556, "step": 179025 }, { "epoch": 1.76, "grad_norm": 2.7668750286102295, "learning_rate": 2.0867931034482762e-06, "loss": 0.1162, "step": 179050 }, { "epoch": 1.76, "grad_norm": 5.777492523193359, "learning_rate": 2.0863620689655173e-06, "loss": 0.2164, "step": 179075 }, { "epoch": 1.76, "grad_norm": 11.522188186645508, "learning_rate": 2.0859310344827587e-06, "loss": 0.1528, "step": 179100 }, { "epoch": 1.76, "grad_norm": 3.915616989135742, "learning_rate": 2.0855e-06, "loss": 0.2141, "step": 179125 }, { "epoch": 1.76, "grad_norm": 10.197601318359375, "learning_rate": 2.0850689655172416e-06, "loss": 0.1603, "step": 179150 }, { "epoch": 1.76, "grad_norm": 4.83461856842041, "learning_rate": 2.084637931034483e-06, "loss": 0.2949, "step": 179175 }, { "epoch": 1.76, "grad_norm": 14.517616271972656, "learning_rate": 2.084206896551724e-06, "loss": 0.1767, "step": 179200 }, { "epoch": 1.76, "grad_norm": 5.087903022766113, "learning_rate": 2.0837758620689656e-06, "loss": 0.2952, "step": 179225 }, { "epoch": 1.76, "grad_norm": 7.656910419464111, "learning_rate": 2.083344827586207e-06, "loss": 0.1436, "step": 179250 }, { "epoch": 1.76, "grad_norm": 6.8724822998046875, "learning_rate": 2.0829137931034485e-06, "loss": 0.3247, "step": 179275 }, { "epoch": 1.76, "grad_norm": 11.630853652954102, "learning_rate": 2.0824827586206895e-06, "loss": 0.1398, "step": 179300 }, { "epoch": 1.76, "grad_norm": 5.02894401550293, "learning_rate": 2.0820517241379314e-06, "loss": 0.3096, "step": 179325 }, { "epoch": 1.76, "grad_norm": 7.505547523498535, "learning_rate": 2.081620689655173e-06, "loss": 0.1322, "step": 179350 }, { "epoch": 1.76, "grad_norm": 3.953495740890503, "learning_rate": 2.081189655172414e-06, "loss": 0.2742, "step": 179375 }, { "epoch": 1.76, "grad_norm": 6.605223178863525, "learning_rate": 2.0807586206896554e-06, "loss": 0.1321, "step": 179400 }, { "epoch": 1.76, "grad_norm": 4.663348197937012, "learning_rate": 2.080327586206897e-06, "loss": 0.2694, "step": 179425 }, { "epoch": 1.76, "grad_norm": 10.3021821975708, "learning_rate": 2.0798965517241383e-06, "loss": 0.1364, "step": 179450 }, { "epoch": 1.77, "grad_norm": 4.809872627258301, "learning_rate": 2.0794655172413793e-06, "loss": 0.2377, "step": 179475 }, { "epoch": 1.77, "grad_norm": 7.576815128326416, "learning_rate": 2.0790344827586208e-06, "loss": 0.1474, "step": 179500 }, { "epoch": 1.77, "grad_norm": 3.955590009689331, "learning_rate": 2.0786034482758622e-06, "loss": 0.2342, "step": 179525 }, { "epoch": 1.77, "grad_norm": 6.203165054321289, "learning_rate": 2.0781724137931037e-06, "loss": 0.1506, "step": 179550 }, { "epoch": 1.77, "grad_norm": 5.695890426635742, "learning_rate": 2.0777413793103447e-06, "loss": 0.3206, "step": 179575 }, { "epoch": 1.77, "grad_norm": 7.635549068450928, "learning_rate": 2.077310344827586e-06, "loss": 0.1253, "step": 179600 }, { "epoch": 1.77, "grad_norm": 6.12493371963501, "learning_rate": 2.076879310344828e-06, "loss": 0.236, "step": 179625 }, { "epoch": 1.77, "grad_norm": 10.848455429077148, "learning_rate": 2.076448275862069e-06, "loss": 0.1721, "step": 179650 }, { "epoch": 1.77, "grad_norm": 4.1159467697143555, "learning_rate": 2.076034482758621e-06, "loss": 0.2698, "step": 179675 }, { "epoch": 1.77, "grad_norm": 3.52372670173645, "learning_rate": 2.075603448275862e-06, "loss": 0.1294, "step": 179700 }, { "epoch": 1.77, "grad_norm": 4.73252010345459, "learning_rate": 2.075172413793104e-06, "loss": 0.3508, "step": 179725 }, { "epoch": 1.77, "grad_norm": 7.235978126525879, "learning_rate": 2.074741379310345e-06, "loss": 0.1494, "step": 179750 }, { "epoch": 1.77, "grad_norm": 3.6854801177978516, "learning_rate": 2.0743103448275864e-06, "loss": 0.2431, "step": 179775 }, { "epoch": 1.77, "grad_norm": 9.396505355834961, "learning_rate": 2.073879310344828e-06, "loss": 0.1437, "step": 179800 }, { "epoch": 1.77, "grad_norm": 4.268734931945801, "learning_rate": 2.0734482758620693e-06, "loss": 0.2784, "step": 179825 }, { "epoch": 1.77, "grad_norm": 14.40063190460205, "learning_rate": 2.0730172413793107e-06, "loss": 0.1019, "step": 179850 }, { "epoch": 1.77, "grad_norm": 5.165010929107666, "learning_rate": 2.0725862068965518e-06, "loss": 0.2571, "step": 179875 }, { "epoch": 1.77, "grad_norm": 11.232693672180176, "learning_rate": 2.0721551724137932e-06, "loss": 0.1188, "step": 179900 }, { "epoch": 1.77, "grad_norm": 4.14907693862915, "learning_rate": 2.0717241379310347e-06, "loss": 0.2552, "step": 179925 }, { "epoch": 1.77, "grad_norm": 8.202415466308594, "learning_rate": 2.071293103448276e-06, "loss": 0.1423, "step": 179950 }, { "epoch": 1.77, "grad_norm": 4.812180995941162, "learning_rate": 2.070862068965517e-06, "loss": 0.32, "step": 179975 }, { "epoch": 1.77, "grad_norm": 16.178733825683594, "learning_rate": 2.0704310344827586e-06, "loss": 0.1533, "step": 180000 }, { "epoch": 1.77, "eval_loss": 0.552827775478363, "eval_runtime": 5774.7853, "eval_samples_per_second": 1.639, "eval_steps_per_second": 0.205, "eval_wer": 0.12335028648683448, "step": 180000 }, { "epoch": 1.77, "grad_norm": 4.401583194732666, "learning_rate": 2.07e-06, "loss": 0.2576, "step": 180025 }, { "epoch": 1.77, "grad_norm": 7.145916938781738, "learning_rate": 2.0695689655172415e-06, "loss": 0.1602, "step": 180050 }, { "epoch": 1.77, "grad_norm": 5.479348659515381, "learning_rate": 2.069137931034483e-06, "loss": 0.3018, "step": 180075 }, { "epoch": 1.77, "grad_norm": 3.6755030155181885, "learning_rate": 2.0687068965517245e-06, "loss": 0.124, "step": 180100 }, { "epoch": 1.77, "grad_norm": 3.9366753101348877, "learning_rate": 2.068275862068966e-06, "loss": 0.2415, "step": 180125 }, { "epoch": 1.77, "grad_norm": 9.064531326293945, "learning_rate": 2.067844827586207e-06, "loss": 0.1502, "step": 180150 }, { "epoch": 1.77, "grad_norm": 4.5304646492004395, "learning_rate": 2.0674137931034484e-06, "loss": 0.2687, "step": 180175 }, { "epoch": 1.77, "grad_norm": 8.427764892578125, "learning_rate": 2.06698275862069e-06, "loss": 0.1479, "step": 180200 }, { "epoch": 1.77, "grad_norm": 6.6723551750183105, "learning_rate": 2.0665517241379313e-06, "loss": 0.2567, "step": 180225 }, { "epoch": 1.77, "grad_norm": 6.747224807739258, "learning_rate": 2.0661206896551723e-06, "loss": 0.101, "step": 180250 }, { "epoch": 1.77, "grad_norm": 4.738363265991211, "learning_rate": 2.065689655172414e-06, "loss": 0.2402, "step": 180275 }, { "epoch": 1.77, "grad_norm": 9.473875999450684, "learning_rate": 2.0652586206896553e-06, "loss": 0.1553, "step": 180300 }, { "epoch": 1.77, "grad_norm": 5.116284370422363, "learning_rate": 2.0648275862068967e-06, "loss": 0.2945, "step": 180325 }, { "epoch": 1.77, "grad_norm": 22.84437370300293, "learning_rate": 2.064396551724138e-06, "loss": 0.1697, "step": 180350 }, { "epoch": 1.77, "grad_norm": 4.421890735626221, "learning_rate": 2.0639655172413796e-06, "loss": 0.2557, "step": 180375 }, { "epoch": 1.77, "grad_norm": 9.029961585998535, "learning_rate": 2.063534482758621e-06, "loss": 0.1268, "step": 180400 }, { "epoch": 1.77, "grad_norm": 5.719101428985596, "learning_rate": 2.063103448275862e-06, "loss": 0.2785, "step": 180425 }, { "epoch": 1.77, "grad_norm": 9.439170837402344, "learning_rate": 2.0626724137931036e-06, "loss": 0.1524, "step": 180450 }, { "epoch": 1.78, "grad_norm": 4.314789772033691, "learning_rate": 2.062241379310345e-06, "loss": 0.2818, "step": 180475 }, { "epoch": 1.78, "grad_norm": 8.096435546875, "learning_rate": 2.0618103448275865e-06, "loss": 0.163, "step": 180500 }, { "epoch": 1.78, "grad_norm": 6.816254138946533, "learning_rate": 2.0613793103448275e-06, "loss": 0.2437, "step": 180525 }, { "epoch": 1.78, "grad_norm": 8.701316833496094, "learning_rate": 2.060948275862069e-06, "loss": 0.1277, "step": 180550 }, { "epoch": 1.78, "grad_norm": 4.089632987976074, "learning_rate": 2.0605172413793104e-06, "loss": 0.2447, "step": 180575 }, { "epoch": 1.78, "grad_norm": 7.905245780944824, "learning_rate": 2.060086206896552e-06, "loss": 0.1341, "step": 180600 }, { "epoch": 1.78, "grad_norm": 5.535812854766846, "learning_rate": 2.0596551724137933e-06, "loss": 0.244, "step": 180625 }, { "epoch": 1.78, "grad_norm": 8.687996864318848, "learning_rate": 2.059224137931035e-06, "loss": 0.1598, "step": 180650 }, { "epoch": 1.78, "grad_norm": 4.702561855316162, "learning_rate": 2.0587931034482763e-06, "loss": 0.2674, "step": 180675 }, { "epoch": 1.78, "grad_norm": 13.597331047058105, "learning_rate": 2.0583620689655173e-06, "loss": 0.1263, "step": 180700 }, { "epoch": 1.78, "grad_norm": 5.265987396240234, "learning_rate": 2.0579310344827587e-06, "loss": 0.2498, "step": 180725 }, { "epoch": 1.78, "grad_norm": 17.451627731323242, "learning_rate": 2.0575e-06, "loss": 0.1261, "step": 180750 }, { "epoch": 1.78, "grad_norm": 3.476834297180176, "learning_rate": 2.0570689655172417e-06, "loss": 0.2586, "step": 180775 }, { "epoch": 1.78, "grad_norm": 13.525384902954102, "learning_rate": 2.056637931034483e-06, "loss": 0.1173, "step": 180800 }, { "epoch": 1.78, "grad_norm": 8.100985527038574, "learning_rate": 2.056206896551724e-06, "loss": 0.2709, "step": 180825 }, { "epoch": 1.78, "grad_norm": 10.519682884216309, "learning_rate": 2.0557758620689656e-06, "loss": 0.1665, "step": 180850 }, { "epoch": 1.78, "grad_norm": 5.591865539550781, "learning_rate": 2.055344827586207e-06, "loss": 0.266, "step": 180875 }, { "epoch": 1.78, "grad_norm": 0.5760241746902466, "learning_rate": 2.0549137931034485e-06, "loss": 0.1478, "step": 180900 }, { "epoch": 1.78, "grad_norm": 4.5439910888671875, "learning_rate": 2.0544827586206896e-06, "loss": 0.3011, "step": 180925 }, { "epoch": 1.78, "grad_norm": 21.303022384643555, "learning_rate": 2.0540517241379314e-06, "loss": 0.1501, "step": 180950 }, { "epoch": 1.78, "grad_norm": 5.157074928283691, "learning_rate": 2.0536206896551725e-06, "loss": 0.3018, "step": 180975 }, { "epoch": 1.78, "grad_norm": 8.628792762756348, "learning_rate": 2.053189655172414e-06, "loss": 0.1894, "step": 181000 }, { "epoch": 1.78, "grad_norm": 4.957251071929932, "learning_rate": 2.0527586206896554e-06, "loss": 0.2984, "step": 181025 }, { "epoch": 1.78, "grad_norm": 12.44725227355957, "learning_rate": 2.052327586206897e-06, "loss": 0.139, "step": 181050 }, { "epoch": 1.78, "grad_norm": 4.9581098556518555, "learning_rate": 2.0518965517241383e-06, "loss": 0.3542, "step": 181075 }, { "epoch": 1.78, "grad_norm": 7.239570140838623, "learning_rate": 2.0514655172413793e-06, "loss": 0.1601, "step": 181100 }, { "epoch": 1.78, "grad_norm": 3.115522623062134, "learning_rate": 2.0510344827586208e-06, "loss": 0.2911, "step": 181125 }, { "epoch": 1.78, "grad_norm": 17.876379013061523, "learning_rate": 2.0506034482758622e-06, "loss": 0.1235, "step": 181150 }, { "epoch": 1.78, "grad_norm": 3.245830535888672, "learning_rate": 2.0501724137931037e-06, "loss": 0.2884, "step": 181175 }, { "epoch": 1.78, "grad_norm": 6.721278190612793, "learning_rate": 2.0497413793103447e-06, "loss": 0.1313, "step": 181200 }, { "epoch": 1.78, "grad_norm": 5.948192119598389, "learning_rate": 2.049310344827586e-06, "loss": 0.3046, "step": 181225 }, { "epoch": 1.78, "grad_norm": 10.178791999816895, "learning_rate": 2.0488793103448276e-06, "loss": 0.1216, "step": 181250 }, { "epoch": 1.78, "grad_norm": 4.854991912841797, "learning_rate": 2.048448275862069e-06, "loss": 0.2796, "step": 181275 }, { "epoch": 1.78, "grad_norm": 12.457612037658691, "learning_rate": 2.0480172413793106e-06, "loss": 0.142, "step": 181300 }, { "epoch": 1.78, "grad_norm": 4.263694763183594, "learning_rate": 2.047586206896552e-06, "loss": 0.2777, "step": 181325 }, { "epoch": 1.78, "grad_norm": 7.235843181610107, "learning_rate": 2.0471551724137935e-06, "loss": 0.1546, "step": 181350 }, { "epoch": 1.78, "grad_norm": 3.9173262119293213, "learning_rate": 2.0467241379310345e-06, "loss": 0.269, "step": 181375 }, { "epoch": 1.78, "grad_norm": 1.6786773204803467, "learning_rate": 2.046293103448276e-06, "loss": 0.1336, "step": 181400 }, { "epoch": 1.78, "grad_norm": 6.076288223266602, "learning_rate": 2.0458620689655174e-06, "loss": 0.2835, "step": 181425 }, { "epoch": 1.78, "grad_norm": 6.341197967529297, "learning_rate": 2.045431034482759e-06, "loss": 0.1546, "step": 181450 }, { "epoch": 1.78, "grad_norm": 6.3479180335998535, "learning_rate": 2.045e-06, "loss": 0.3348, "step": 181475 }, { "epoch": 1.79, "grad_norm": 0.9452706575393677, "learning_rate": 2.0445689655172414e-06, "loss": 0.119, "step": 181500 }, { "epoch": 1.79, "grad_norm": 3.7522599697113037, "learning_rate": 2.0441379310344832e-06, "loss": 0.2269, "step": 181525 }, { "epoch": 1.79, "grad_norm": 6.6811957359313965, "learning_rate": 2.0437068965517243e-06, "loss": 0.1565, "step": 181550 }, { "epoch": 1.79, "grad_norm": 4.270876884460449, "learning_rate": 2.0432758620689657e-06, "loss": 0.2543, "step": 181575 }, { "epoch": 1.79, "grad_norm": 15.998218536376953, "learning_rate": 2.042844827586207e-06, "loss": 0.123, "step": 181600 }, { "epoch": 1.79, "grad_norm": 3.446228504180908, "learning_rate": 2.0424137931034486e-06, "loss": 0.2638, "step": 181625 }, { "epoch": 1.79, "grad_norm": 4.521145343780518, "learning_rate": 2.0419827586206897e-06, "loss": 0.1554, "step": 181650 }, { "epoch": 1.79, "grad_norm": 5.983158111572266, "learning_rate": 2.041551724137931e-06, "loss": 0.2564, "step": 181675 }, { "epoch": 1.79, "grad_norm": 7.638391017913818, "learning_rate": 2.0411206896551726e-06, "loss": 0.1293, "step": 181700 }, { "epoch": 1.79, "grad_norm": 3.948631763458252, "learning_rate": 2.040689655172414e-06, "loss": 0.2774, "step": 181725 }, { "epoch": 1.79, "grad_norm": 6.14481782913208, "learning_rate": 2.0402586206896555e-06, "loss": 0.1392, "step": 181750 }, { "epoch": 1.79, "grad_norm": 4.502658367156982, "learning_rate": 2.0398275862068965e-06, "loss": 0.2656, "step": 181775 }, { "epoch": 1.79, "grad_norm": 5.456671714782715, "learning_rate": 2.039396551724138e-06, "loss": 0.1602, "step": 181800 }, { "epoch": 1.79, "grad_norm": 3.495281934738159, "learning_rate": 2.0389655172413794e-06, "loss": 0.2408, "step": 181825 }, { "epoch": 1.79, "grad_norm": 6.417679309844971, "learning_rate": 2.038534482758621e-06, "loss": 0.1476, "step": 181850 }, { "epoch": 1.79, "grad_norm": 4.200814723968506, "learning_rate": 2.0381034482758624e-06, "loss": 0.2724, "step": 181875 }, { "epoch": 1.79, "grad_norm": 8.626921653747559, "learning_rate": 2.037672413793104e-06, "loss": 0.1086, "step": 181900 }, { "epoch": 1.79, "grad_norm": 4.0004119873046875, "learning_rate": 2.037241379310345e-06, "loss": 0.3064, "step": 181925 }, { "epoch": 1.79, "grad_norm": 5.245245456695557, "learning_rate": 2.0368103448275863e-06, "loss": 0.1342, "step": 181950 }, { "epoch": 1.79, "grad_norm": 5.530726909637451, "learning_rate": 2.0363793103448278e-06, "loss": 0.3055, "step": 181975 }, { "epoch": 1.79, "grad_norm": 9.38002872467041, "learning_rate": 2.0359482758620692e-06, "loss": 0.1399, "step": 182000 }, { "epoch": 1.79, "grad_norm": 4.2490010261535645, "learning_rate": 2.0355172413793107e-06, "loss": 0.2515, "step": 182025 }, { "epoch": 1.79, "grad_norm": 8.576883316040039, "learning_rate": 2.0350862068965517e-06, "loss": 0.1116, "step": 182050 }, { "epoch": 1.79, "grad_norm": 3.688910961151123, "learning_rate": 2.034655172413793e-06, "loss": 0.2831, "step": 182075 }, { "epoch": 1.79, "grad_norm": 8.693769454956055, "learning_rate": 2.0342241379310346e-06, "loss": 0.1478, "step": 182100 }, { "epoch": 1.79, "grad_norm": 4.721016883850098, "learning_rate": 2.033793103448276e-06, "loss": 0.3174, "step": 182125 }, { "epoch": 1.79, "grad_norm": 8.435751914978027, "learning_rate": 2.033362068965517e-06, "loss": 0.1557, "step": 182150 }, { "epoch": 1.79, "grad_norm": 5.213420391082764, "learning_rate": 2.032931034482759e-06, "loss": 0.2642, "step": 182175 }, { "epoch": 1.79, "grad_norm": 4.818663597106934, "learning_rate": 2.0325e-06, "loss": 0.1624, "step": 182200 }, { "epoch": 1.79, "grad_norm": 3.729290723800659, "learning_rate": 2.0320689655172415e-06, "loss": 0.2407, "step": 182225 }, { "epoch": 1.79, "grad_norm": 7.246818542480469, "learning_rate": 2.031637931034483e-06, "loss": 0.1239, "step": 182250 }, { "epoch": 1.79, "grad_norm": 4.327967166900635, "learning_rate": 2.031224137931035e-06, "loss": 0.273, "step": 182275 }, { "epoch": 1.79, "grad_norm": 7.910094738006592, "learning_rate": 2.0307931034482763e-06, "loss": 0.1464, "step": 182300 }, { "epoch": 1.79, "grad_norm": 5.38978910446167, "learning_rate": 2.0303620689655173e-06, "loss": 0.3093, "step": 182325 }, { "epoch": 1.79, "grad_norm": 7.916505336761475, "learning_rate": 2.0299310344827588e-06, "loss": 0.1311, "step": 182350 }, { "epoch": 1.79, "grad_norm": 5.307473659515381, "learning_rate": 2.0295000000000002e-06, "loss": 0.2539, "step": 182375 }, { "epoch": 1.79, "grad_norm": 9.37229061126709, "learning_rate": 2.0290689655172417e-06, "loss": 0.1326, "step": 182400 }, { "epoch": 1.79, "grad_norm": 6.522706985473633, "learning_rate": 2.0286379310344827e-06, "loss": 0.3228, "step": 182425 }, { "epoch": 1.79, "grad_norm": 11.226419448852539, "learning_rate": 2.028206896551724e-06, "loss": 0.1443, "step": 182450 }, { "epoch": 1.79, "grad_norm": 5.748000144958496, "learning_rate": 2.0277758620689656e-06, "loss": 0.252, "step": 182475 }, { "epoch": 1.79, "grad_norm": 6.967744827270508, "learning_rate": 2.027344827586207e-06, "loss": 0.1299, "step": 182500 }, { "epoch": 1.8, "grad_norm": 3.8133764266967773, "learning_rate": 2.0269137931034485e-06, "loss": 0.2808, "step": 182525 }, { "epoch": 1.8, "grad_norm": 7.8987884521484375, "learning_rate": 2.0264827586206896e-06, "loss": 0.1377, "step": 182550 }, { "epoch": 1.8, "grad_norm": 3.5617151260375977, "learning_rate": 2.0260517241379314e-06, "loss": 0.2486, "step": 182575 }, { "epoch": 1.8, "grad_norm": 11.26976490020752, "learning_rate": 2.0256206896551725e-06, "loss": 0.1895, "step": 182600 }, { "epoch": 1.8, "grad_norm": 4.490561485290527, "learning_rate": 2.025189655172414e-06, "loss": 0.2289, "step": 182625 }, { "epoch": 1.8, "grad_norm": 6.840845108032227, "learning_rate": 2.0247586206896554e-06, "loss": 0.1305, "step": 182650 }, { "epoch": 1.8, "grad_norm": 4.237077236175537, "learning_rate": 2.024327586206897e-06, "loss": 0.2346, "step": 182675 }, { "epoch": 1.8, "grad_norm": 8.798004150390625, "learning_rate": 2.023896551724138e-06, "loss": 0.1632, "step": 182700 }, { "epoch": 1.8, "grad_norm": 4.624111175537109, "learning_rate": 2.0234655172413793e-06, "loss": 0.2683, "step": 182725 }, { "epoch": 1.8, "grad_norm": 5.99928617477417, "learning_rate": 2.023034482758621e-06, "loss": 0.1059, "step": 182750 }, { "epoch": 1.8, "grad_norm": 3.2513606548309326, "learning_rate": 2.0226034482758623e-06, "loss": 0.2757, "step": 182775 }, { "epoch": 1.8, "grad_norm": 8.248435974121094, "learning_rate": 2.0221724137931037e-06, "loss": 0.1541, "step": 182800 }, { "epoch": 1.8, "grad_norm": 4.408049583435059, "learning_rate": 2.0217413793103447e-06, "loss": 0.2757, "step": 182825 }, { "epoch": 1.8, "grad_norm": 4.9890217781066895, "learning_rate": 2.0213103448275866e-06, "loss": 0.1476, "step": 182850 }, { "epoch": 1.8, "grad_norm": 5.644765853881836, "learning_rate": 2.0208793103448277e-06, "loss": 0.2529, "step": 182875 }, { "epoch": 1.8, "grad_norm": 4.667116165161133, "learning_rate": 2.020448275862069e-06, "loss": 0.1788, "step": 182900 }, { "epoch": 1.8, "grad_norm": 3.7748491764068604, "learning_rate": 2.0200172413793106e-06, "loss": 0.2517, "step": 182925 }, { "epoch": 1.8, "grad_norm": 6.036440849304199, "learning_rate": 2.019586206896552e-06, "loss": 0.1339, "step": 182950 }, { "epoch": 1.8, "grad_norm": 5.169650554656982, "learning_rate": 2.0191551724137935e-06, "loss": 0.3527, "step": 182975 }, { "epoch": 1.8, "grad_norm": 14.58109188079834, "learning_rate": 2.0187241379310345e-06, "loss": 0.1429, "step": 183000 }, { "epoch": 1.8, "grad_norm": 4.844177722930908, "learning_rate": 2.018293103448276e-06, "loss": 0.2464, "step": 183025 }, { "epoch": 1.8, "grad_norm": 9.102618217468262, "learning_rate": 2.0178620689655174e-06, "loss": 0.1507, "step": 183050 }, { "epoch": 1.8, "grad_norm": 7.619293689727783, "learning_rate": 2.017431034482759e-06, "loss": 0.2966, "step": 183075 }, { "epoch": 1.8, "grad_norm": 7.438868999481201, "learning_rate": 2.017e-06, "loss": 0.1575, "step": 183100 }, { "epoch": 1.8, "grad_norm": 5.527750015258789, "learning_rate": 2.0165689655172414e-06, "loss": 0.2595, "step": 183125 }, { "epoch": 1.8, "grad_norm": 28.851646423339844, "learning_rate": 2.016137931034483e-06, "loss": 0.1414, "step": 183150 }, { "epoch": 1.8, "grad_norm": 5.02254056930542, "learning_rate": 2.0157068965517243e-06, "loss": 0.2692, "step": 183175 }, { "epoch": 1.8, "grad_norm": 8.08593463897705, "learning_rate": 2.0152758620689657e-06, "loss": 0.178, "step": 183200 }, { "epoch": 1.8, "grad_norm": 5.670437335968018, "learning_rate": 2.014844827586207e-06, "loss": 0.2832, "step": 183225 }, { "epoch": 1.8, "grad_norm": 11.195930480957031, "learning_rate": 2.0144137931034487e-06, "loss": 0.1675, "step": 183250 }, { "epoch": 1.8, "grad_norm": 4.8223347663879395, "learning_rate": 2.0139827586206897e-06, "loss": 0.271, "step": 183275 }, { "epoch": 1.8, "grad_norm": 11.317625999450684, "learning_rate": 2.013551724137931e-06, "loss": 0.1497, "step": 183300 }, { "epoch": 1.8, "grad_norm": 4.763051509857178, "learning_rate": 2.0131206896551726e-06, "loss": 0.2834, "step": 183325 }, { "epoch": 1.8, "grad_norm": 6.102139949798584, "learning_rate": 2.012689655172414e-06, "loss": 0.1447, "step": 183350 }, { "epoch": 1.8, "grad_norm": 4.468783855438232, "learning_rate": 2.012258620689655e-06, "loss": 0.283, "step": 183375 }, { "epoch": 1.8, "grad_norm": 7.931102275848389, "learning_rate": 2.0118275862068965e-06, "loss": 0.1364, "step": 183400 }, { "epoch": 1.8, "grad_norm": 5.567911624908447, "learning_rate": 2.0113965517241384e-06, "loss": 0.2627, "step": 183425 }, { "epoch": 1.8, "grad_norm": 11.655583381652832, "learning_rate": 2.0109655172413795e-06, "loss": 0.1462, "step": 183450 }, { "epoch": 1.8, "grad_norm": 6.182898044586182, "learning_rate": 2.010534482758621e-06, "loss": 0.2901, "step": 183475 }, { "epoch": 1.8, "grad_norm": 11.521553993225098, "learning_rate": 2.0101034482758624e-06, "loss": 0.1449, "step": 183500 }, { "epoch": 1.81, "grad_norm": 5.390520095825195, "learning_rate": 2.009672413793104e-06, "loss": 0.3294, "step": 183525 }, { "epoch": 1.81, "grad_norm": 3.3118064403533936, "learning_rate": 2.009241379310345e-06, "loss": 0.1531, "step": 183550 }, { "epoch": 1.81, "grad_norm": 5.96399450302124, "learning_rate": 2.0088103448275863e-06, "loss": 0.2944, "step": 183575 }, { "epoch": 1.81, "grad_norm": 2.3771417140960693, "learning_rate": 2.0083793103448278e-06, "loss": 0.1478, "step": 183600 }, { "epoch": 1.81, "grad_norm": 5.380075931549072, "learning_rate": 2.0079482758620692e-06, "loss": 0.256, "step": 183625 }, { "epoch": 1.81, "grad_norm": 12.679900169372559, "learning_rate": 2.0075172413793107e-06, "loss": 0.1559, "step": 183650 }, { "epoch": 1.81, "grad_norm": 3.6436891555786133, "learning_rate": 2.0070862068965517e-06, "loss": 0.2951, "step": 183675 }, { "epoch": 1.81, "grad_norm": 8.39306640625, "learning_rate": 2.006655172413793e-06, "loss": 0.1501, "step": 183700 }, { "epoch": 1.81, "grad_norm": 5.037114143371582, "learning_rate": 2.0062241379310346e-06, "loss": 0.2866, "step": 183725 }, { "epoch": 1.81, "grad_norm": 12.340743064880371, "learning_rate": 2.005793103448276e-06, "loss": 0.1414, "step": 183750 }, { "epoch": 1.81, "grad_norm": 3.2550830841064453, "learning_rate": 2.005362068965517e-06, "loss": 0.256, "step": 183775 }, { "epoch": 1.81, "grad_norm": 11.208893775939941, "learning_rate": 2.004931034482759e-06, "loss": 0.1421, "step": 183800 }, { "epoch": 1.81, "grad_norm": 5.103369235992432, "learning_rate": 2.0045e-06, "loss": 0.2516, "step": 183825 }, { "epoch": 1.81, "grad_norm": 9.832314491271973, "learning_rate": 2.0040689655172415e-06, "loss": 0.1435, "step": 183850 }, { "epoch": 1.81, "grad_norm": 5.576724052429199, "learning_rate": 2.003637931034483e-06, "loss": 0.2726, "step": 183875 }, { "epoch": 1.81, "grad_norm": 10.540629386901855, "learning_rate": 2.0032068965517244e-06, "loss": 0.1325, "step": 183900 }, { "epoch": 1.81, "grad_norm": 4.22019624710083, "learning_rate": 2.002775862068966e-06, "loss": 0.2551, "step": 183925 }, { "epoch": 1.81, "grad_norm": 9.381143569946289, "learning_rate": 2.002344827586207e-06, "loss": 0.113, "step": 183950 }, { "epoch": 1.81, "grad_norm": 4.412118911743164, "learning_rate": 2.0019137931034484e-06, "loss": 0.2696, "step": 183975 }, { "epoch": 1.81, "grad_norm": 10.842039108276367, "learning_rate": 2.00148275862069e-06, "loss": 0.1311, "step": 184000 }, { "epoch": 1.81, "grad_norm": 4.212403774261475, "learning_rate": 2.0010517241379313e-06, "loss": 0.2736, "step": 184025 }, { "epoch": 1.81, "grad_norm": 6.535915851593018, "learning_rate": 2.0006206896551723e-06, "loss": 0.1171, "step": 184050 }, { "epoch": 1.81, "grad_norm": 9.306844711303711, "learning_rate": 2.000189655172414e-06, "loss": 0.3051, "step": 184075 }, { "epoch": 1.81, "grad_norm": 6.4897074699401855, "learning_rate": 1.999758620689655e-06, "loss": 0.136, "step": 184100 }, { "epoch": 1.81, "grad_norm": 10.673442840576172, "learning_rate": 1.9993275862068967e-06, "loss": 0.2472, "step": 184125 }, { "epoch": 1.81, "grad_norm": 9.35417366027832, "learning_rate": 1.998896551724138e-06, "loss": 0.1344, "step": 184150 }, { "epoch": 1.81, "grad_norm": 3.7491908073425293, "learning_rate": 1.9984655172413796e-06, "loss": 0.308, "step": 184175 }, { "epoch": 1.81, "grad_norm": 9.043850898742676, "learning_rate": 1.998034482758621e-06, "loss": 0.1445, "step": 184200 }, { "epoch": 1.81, "grad_norm": 4.467794418334961, "learning_rate": 1.997603448275862e-06, "loss": 0.2716, "step": 184225 }, { "epoch": 1.81, "grad_norm": 9.363259315490723, "learning_rate": 1.9971724137931035e-06, "loss": 0.1375, "step": 184250 }, { "epoch": 1.81, "grad_norm": 3.7592291831970215, "learning_rate": 1.996741379310345e-06, "loss": 0.2415, "step": 184275 }, { "epoch": 1.81, "grad_norm": 9.117269515991211, "learning_rate": 1.9963103448275864e-06, "loss": 0.1565, "step": 184300 }, { "epoch": 1.81, "grad_norm": 6.3469696044921875, "learning_rate": 1.9958793103448275e-06, "loss": 0.2341, "step": 184325 }, { "epoch": 1.81, "grad_norm": 11.458380699157715, "learning_rate": 1.995448275862069e-06, "loss": 0.1659, "step": 184350 }, { "epoch": 1.81, "grad_norm": 4.562179088592529, "learning_rate": 1.995017241379311e-06, "loss": 0.2747, "step": 184375 }, { "epoch": 1.81, "grad_norm": 7.7706780433654785, "learning_rate": 1.994586206896552e-06, "loss": 0.1125, "step": 184400 }, { "epoch": 1.81, "grad_norm": 4.99575662612915, "learning_rate": 1.9941724137931037e-06, "loss": 0.266, "step": 184425 }, { "epoch": 1.81, "grad_norm": 10.199362754821777, "learning_rate": 1.9937413793103448e-06, "loss": 0.133, "step": 184450 }, { "epoch": 1.81, "grad_norm": 5.35590124130249, "learning_rate": 1.9933103448275866e-06, "loss": 0.2589, "step": 184475 }, { "epoch": 1.81, "grad_norm": 14.027758598327637, "learning_rate": 1.9928793103448277e-06, "loss": 0.1456, "step": 184500 }, { "epoch": 1.81, "grad_norm": 5.423304557800293, "learning_rate": 1.992448275862069e-06, "loss": 0.2601, "step": 184525 }, { "epoch": 1.82, "grad_norm": 3.3518998622894287, "learning_rate": 1.9920172413793106e-06, "loss": 0.1619, "step": 184550 }, { "epoch": 1.82, "grad_norm": 5.300433158874512, "learning_rate": 1.991586206896552e-06, "loss": 0.275, "step": 184575 }, { "epoch": 1.82, "grad_norm": 12.864575386047363, "learning_rate": 1.991155172413793e-06, "loss": 0.1388, "step": 184600 }, { "epoch": 1.82, "grad_norm": 3.2262747287750244, "learning_rate": 1.9907241379310345e-06, "loss": 0.2271, "step": 184625 }, { "epoch": 1.82, "grad_norm": 5.62362813949585, "learning_rate": 1.990293103448276e-06, "loss": 0.1243, "step": 184650 }, { "epoch": 1.82, "grad_norm": 5.28811502456665, "learning_rate": 1.9898620689655174e-06, "loss": 0.2593, "step": 184675 }, { "epoch": 1.82, "grad_norm": 12.709386825561523, "learning_rate": 1.989431034482759e-06, "loss": 0.1695, "step": 184700 }, { "epoch": 1.82, "grad_norm": 5.586156845092773, "learning_rate": 1.989e-06, "loss": 0.3032, "step": 184725 }, { "epoch": 1.82, "grad_norm": 15.16342830657959, "learning_rate": 1.988568965517242e-06, "loss": 0.1159, "step": 184750 }, { "epoch": 1.82, "grad_norm": 3.8192503452301025, "learning_rate": 1.988137931034483e-06, "loss": 0.3071, "step": 184775 }, { "epoch": 1.82, "grad_norm": 8.011751174926758, "learning_rate": 1.9877068965517243e-06, "loss": 0.1499, "step": 184800 }, { "epoch": 1.82, "grad_norm": 3.283440113067627, "learning_rate": 1.9872758620689658e-06, "loss": 0.2724, "step": 184825 }, { "epoch": 1.82, "grad_norm": 9.91345500946045, "learning_rate": 1.9868448275862072e-06, "loss": 0.1563, "step": 184850 }, { "epoch": 1.82, "grad_norm": 4.42605447769165, "learning_rate": 1.9864137931034487e-06, "loss": 0.2623, "step": 184875 }, { "epoch": 1.82, "grad_norm": 5.750814914703369, "learning_rate": 1.9859827586206897e-06, "loss": 0.1175, "step": 184900 }, { "epoch": 1.82, "grad_norm": 3.624143362045288, "learning_rate": 1.985551724137931e-06, "loss": 0.3111, "step": 184925 }, { "epoch": 1.82, "grad_norm": 9.519949913024902, "learning_rate": 1.9851206896551726e-06, "loss": 0.1306, "step": 184950 }, { "epoch": 1.82, "grad_norm": 4.795050621032715, "learning_rate": 1.984689655172414e-06, "loss": 0.2793, "step": 184975 }, { "epoch": 1.82, "grad_norm": 9.72948169708252, "learning_rate": 1.984258620689655e-06, "loss": 0.1393, "step": 185000 }, { "epoch": 1.82, "grad_norm": 5.454403877258301, "learning_rate": 1.9838275862068966e-06, "loss": 0.2497, "step": 185025 }, { "epoch": 1.82, "grad_norm": 3.93153977394104, "learning_rate": 1.983396551724138e-06, "loss": 0.142, "step": 185050 }, { "epoch": 1.82, "grad_norm": 5.4076361656188965, "learning_rate": 1.9829655172413795e-06, "loss": 0.2658, "step": 185075 }, { "epoch": 1.82, "grad_norm": 9.163079261779785, "learning_rate": 1.982534482758621e-06, "loss": 0.1296, "step": 185100 }, { "epoch": 1.82, "grad_norm": 5.711435317993164, "learning_rate": 1.9821034482758624e-06, "loss": 0.2453, "step": 185125 }, { "epoch": 1.82, "grad_norm": 5.956074237823486, "learning_rate": 1.981672413793104e-06, "loss": 0.153, "step": 185150 }, { "epoch": 1.82, "grad_norm": 3.966886520385742, "learning_rate": 1.981241379310345e-06, "loss": 0.2638, "step": 185175 }, { "epoch": 1.82, "grad_norm": 10.738329887390137, "learning_rate": 1.9808103448275863e-06, "loss": 0.1521, "step": 185200 }, { "epoch": 1.82, "grad_norm": 5.103275775909424, "learning_rate": 1.980379310344828e-06, "loss": 0.3088, "step": 185225 }, { "epoch": 1.82, "grad_norm": 11.790279388427734, "learning_rate": 1.9799482758620692e-06, "loss": 0.1436, "step": 185250 }, { "epoch": 1.82, "grad_norm": 6.507052421569824, "learning_rate": 1.9795172413793103e-06, "loss": 0.2547, "step": 185275 }, { "epoch": 1.82, "grad_norm": 16.37847137451172, "learning_rate": 1.9790862068965517e-06, "loss": 0.1308, "step": 185300 }, { "epoch": 1.82, "grad_norm": 4.442214488983154, "learning_rate": 1.978655172413793e-06, "loss": 0.2495, "step": 185325 }, { "epoch": 1.82, "grad_norm": 7.094724178314209, "learning_rate": 1.9782241379310346e-06, "loss": 0.1356, "step": 185350 }, { "epoch": 1.82, "grad_norm": 4.453603744506836, "learning_rate": 1.977793103448276e-06, "loss": 0.2744, "step": 185375 }, { "epoch": 1.82, "grad_norm": 10.690865516662598, "learning_rate": 1.9773620689655176e-06, "loss": 0.1759, "step": 185400 }, { "epoch": 1.82, "grad_norm": 5.885151386260986, "learning_rate": 1.976931034482759e-06, "loss": 0.2755, "step": 185425 }, { "epoch": 1.82, "grad_norm": 4.160984992980957, "learning_rate": 1.9765e-06, "loss": 0.1359, "step": 185450 }, { "epoch": 1.82, "grad_norm": 3.3812849521636963, "learning_rate": 1.9760689655172415e-06, "loss": 0.3238, "step": 185475 }, { "epoch": 1.82, "grad_norm": 10.689332962036133, "learning_rate": 1.975637931034483e-06, "loss": 0.131, "step": 185500 }, { "epoch": 1.82, "grad_norm": 4.633937835693359, "learning_rate": 1.9752068965517244e-06, "loss": 0.2637, "step": 185525 }, { "epoch": 1.82, "grad_norm": 17.073394775390625, "learning_rate": 1.9747758620689655e-06, "loss": 0.1363, "step": 185550 }, { "epoch": 1.83, "grad_norm": 5.081617832183838, "learning_rate": 1.974344827586207e-06, "loss": 0.3146, "step": 185575 }, { "epoch": 1.83, "grad_norm": 5.051661014556885, "learning_rate": 1.9739137931034484e-06, "loss": 0.1342, "step": 185600 }, { "epoch": 1.83, "grad_norm": 5.335829734802246, "learning_rate": 1.97348275862069e-06, "loss": 0.2487, "step": 185625 }, { "epoch": 1.83, "grad_norm": 7.018686294555664, "learning_rate": 1.9730517241379313e-06, "loss": 0.1467, "step": 185650 }, { "epoch": 1.83, "grad_norm": 4.284624099731445, "learning_rate": 1.9726206896551723e-06, "loss": 0.2393, "step": 185675 }, { "epoch": 1.83, "grad_norm": 14.701166152954102, "learning_rate": 1.972189655172414e-06, "loss": 0.163, "step": 185700 }, { "epoch": 1.83, "grad_norm": 5.1182684898376465, "learning_rate": 1.9717586206896552e-06, "loss": 0.2752, "step": 185725 }, { "epoch": 1.83, "grad_norm": 13.125380516052246, "learning_rate": 1.9713275862068967e-06, "loss": 0.139, "step": 185750 }, { "epoch": 1.83, "grad_norm": 6.883011341094971, "learning_rate": 1.970896551724138e-06, "loss": 0.2742, "step": 185775 }, { "epoch": 1.83, "grad_norm": 13.4720458984375, "learning_rate": 1.9704655172413796e-06, "loss": 0.1555, "step": 185800 }, { "epoch": 1.83, "grad_norm": 5.592872619628906, "learning_rate": 1.970034482758621e-06, "loss": 0.2167, "step": 185825 }, { "epoch": 1.83, "grad_norm": 9.20092487335205, "learning_rate": 1.969603448275862e-06, "loss": 0.1397, "step": 185850 }, { "epoch": 1.83, "grad_norm": 4.868060111999512, "learning_rate": 1.9691724137931035e-06, "loss": 0.2127, "step": 185875 }, { "epoch": 1.83, "grad_norm": 9.430338859558105, "learning_rate": 1.968741379310345e-06, "loss": 0.1411, "step": 185900 }, { "epoch": 1.83, "grad_norm": 6.076472759246826, "learning_rate": 1.9683103448275865e-06, "loss": 0.2759, "step": 185925 }, { "epoch": 1.83, "grad_norm": 7.454299449920654, "learning_rate": 1.9678793103448275e-06, "loss": 0.1547, "step": 185950 }, { "epoch": 1.83, "grad_norm": 5.594222068786621, "learning_rate": 1.9674482758620694e-06, "loss": 0.2935, "step": 185975 }, { "epoch": 1.83, "grad_norm": 8.037284851074219, "learning_rate": 1.9670172413793104e-06, "loss": 0.1483, "step": 186000 }, { "epoch": 1.83, "grad_norm": 4.679470062255859, "learning_rate": 1.966586206896552e-06, "loss": 0.2938, "step": 186025 }, { "epoch": 1.83, "grad_norm": 9.410880088806152, "learning_rate": 1.9661551724137933e-06, "loss": 0.1722, "step": 186050 }, { "epoch": 1.83, "grad_norm": 4.019765853881836, "learning_rate": 1.9657241379310348e-06, "loss": 0.217, "step": 186075 }, { "epoch": 1.83, "grad_norm": 11.81381893157959, "learning_rate": 1.9652931034482762e-06, "loss": 0.158, "step": 186100 }, { "epoch": 1.83, "grad_norm": 3.921834707260132, "learning_rate": 1.9648620689655173e-06, "loss": 0.3368, "step": 186125 }, { "epoch": 1.83, "grad_norm": 11.251373291015625, "learning_rate": 1.9644310344827587e-06, "loss": 0.1445, "step": 186150 }, { "epoch": 1.83, "grad_norm": 3.8352181911468506, "learning_rate": 1.964e-06, "loss": 0.2753, "step": 186175 }, { "epoch": 1.83, "grad_norm": 5.570605754852295, "learning_rate": 1.9635689655172416e-06, "loss": 0.1166, "step": 186200 }, { "epoch": 1.83, "grad_norm": 6.332454681396484, "learning_rate": 1.9631379310344827e-06, "loss": 0.2289, "step": 186225 }, { "epoch": 1.83, "grad_norm": 5.862762928009033, "learning_rate": 1.962706896551724e-06, "loss": 0.1261, "step": 186250 }, { "epoch": 1.83, "grad_norm": 5.225114822387695, "learning_rate": 1.962275862068966e-06, "loss": 0.2587, "step": 186275 }, { "epoch": 1.83, "grad_norm": 7.6685967445373535, "learning_rate": 1.961844827586207e-06, "loss": 0.1331, "step": 186300 }, { "epoch": 1.83, "grad_norm": 3.7289650440216064, "learning_rate": 1.9614137931034485e-06, "loss": 0.2751, "step": 186325 }, { "epoch": 1.83, "grad_norm": 4.4400410652160645, "learning_rate": 1.96098275862069e-06, "loss": 0.1468, "step": 186350 }, { "epoch": 1.83, "grad_norm": 5.9345808029174805, "learning_rate": 1.9605517241379314e-06, "loss": 0.2652, "step": 186375 }, { "epoch": 1.83, "grad_norm": 4.973659992218018, "learning_rate": 1.9601206896551724e-06, "loss": 0.1231, "step": 186400 }, { "epoch": 1.83, "grad_norm": 4.4216742515563965, "learning_rate": 1.959689655172414e-06, "loss": 0.2704, "step": 186425 }, { "epoch": 1.83, "grad_norm": 8.29608154296875, "learning_rate": 1.9592586206896553e-06, "loss": 0.1446, "step": 186450 }, { "epoch": 1.83, "grad_norm": 3.8168017864227295, "learning_rate": 1.958827586206897e-06, "loss": 0.2777, "step": 186475 }, { "epoch": 1.83, "grad_norm": 9.46706771850586, "learning_rate": 1.9583965517241383e-06, "loss": 0.1662, "step": 186500 }, { "epoch": 1.83, "grad_norm": 5.085323810577393, "learning_rate": 1.9579827586206897e-06, "loss": 0.2962, "step": 186525 }, { "epoch": 1.83, "grad_norm": 15.399313926696777, "learning_rate": 1.957551724137931e-06, "loss": 0.129, "step": 186550 }, { "epoch": 1.84, "grad_norm": 3.087167739868164, "learning_rate": 1.9571206896551726e-06, "loss": 0.2888, "step": 186575 }, { "epoch": 1.84, "grad_norm": 8.361637115478516, "learning_rate": 1.956689655172414e-06, "loss": 0.1408, "step": 186600 }, { "epoch": 1.84, "grad_norm": 3.282095432281494, "learning_rate": 1.956258620689655e-06, "loss": 0.2744, "step": 186625 }, { "epoch": 1.84, "grad_norm": 7.892955303192139, "learning_rate": 1.9558275862068966e-06, "loss": 0.1394, "step": 186650 }, { "epoch": 1.84, "grad_norm": 4.091876983642578, "learning_rate": 1.955396551724138e-06, "loss": 0.2662, "step": 186675 }, { "epoch": 1.84, "grad_norm": 7.267695903778076, "learning_rate": 1.9549655172413795e-06, "loss": 0.126, "step": 186700 }, { "epoch": 1.84, "grad_norm": 4.21783447265625, "learning_rate": 1.954534482758621e-06, "loss": 0.2496, "step": 186725 }, { "epoch": 1.84, "grad_norm": 5.997796058654785, "learning_rate": 1.9541034482758624e-06, "loss": 0.1301, "step": 186750 }, { "epoch": 1.84, "grad_norm": 3.8526501655578613, "learning_rate": 1.953672413793104e-06, "loss": 0.2779, "step": 186775 }, { "epoch": 1.84, "grad_norm": 8.640957832336426, "learning_rate": 1.953241379310345e-06, "loss": 0.1585, "step": 186800 }, { "epoch": 1.84, "grad_norm": 7.148099899291992, "learning_rate": 1.9528103448275863e-06, "loss": 0.2671, "step": 186825 }, { "epoch": 1.84, "grad_norm": 7.981035232543945, "learning_rate": 1.952379310344828e-06, "loss": 0.1322, "step": 186850 }, { "epoch": 1.84, "grad_norm": 3.680171251296997, "learning_rate": 1.9519482758620693e-06, "loss": 0.2773, "step": 186875 }, { "epoch": 1.84, "grad_norm": 8.382801055908203, "learning_rate": 1.9515172413793103e-06, "loss": 0.141, "step": 186900 }, { "epoch": 1.84, "grad_norm": 5.007704734802246, "learning_rate": 1.9510862068965517e-06, "loss": 0.2871, "step": 186925 }, { "epoch": 1.84, "grad_norm": 12.387481689453125, "learning_rate": 1.950655172413793e-06, "loss": 0.1211, "step": 186950 }, { "epoch": 1.84, "grad_norm": 4.338253498077393, "learning_rate": 1.9502241379310347e-06, "loss": 0.2655, "step": 186975 }, { "epoch": 1.84, "grad_norm": 10.873542785644531, "learning_rate": 1.949793103448276e-06, "loss": 0.1613, "step": 187000 }, { "epoch": 1.84, "grad_norm": 2.5770814418792725, "learning_rate": 1.9493620689655176e-06, "loss": 0.2447, "step": 187025 }, { "epoch": 1.84, "grad_norm": 7.395229816436768, "learning_rate": 1.948931034482759e-06, "loss": 0.1346, "step": 187050 }, { "epoch": 1.84, "grad_norm": 5.879717826843262, "learning_rate": 1.9485e-06, "loss": 0.2558, "step": 187075 }, { "epoch": 1.84, "grad_norm": 9.80898380279541, "learning_rate": 1.9480689655172415e-06, "loss": 0.1372, "step": 187100 }, { "epoch": 1.84, "grad_norm": 5.137805461883545, "learning_rate": 1.947637931034483e-06, "loss": 0.272, "step": 187125 }, { "epoch": 1.84, "grad_norm": 12.496495246887207, "learning_rate": 1.9472068965517244e-06, "loss": 0.1441, "step": 187150 }, { "epoch": 1.84, "grad_norm": 4.145923137664795, "learning_rate": 1.9467758620689655e-06, "loss": 0.2538, "step": 187175 }, { "epoch": 1.84, "grad_norm": 9.55533504486084, "learning_rate": 1.946344827586207e-06, "loss": 0.1509, "step": 187200 }, { "epoch": 1.84, "grad_norm": 5.142787933349609, "learning_rate": 1.9459137931034484e-06, "loss": 0.2624, "step": 187225 }, { "epoch": 1.84, "grad_norm": 7.683206081390381, "learning_rate": 1.94548275862069e-06, "loss": 0.1632, "step": 187250 }, { "epoch": 1.84, "grad_norm": 4.573040962219238, "learning_rate": 1.9450517241379313e-06, "loss": 0.3072, "step": 187275 }, { "epoch": 1.84, "grad_norm": 10.60954475402832, "learning_rate": 1.9446206896551723e-06, "loss": 0.1579, "step": 187300 }, { "epoch": 1.84, "grad_norm": 3.607264995574951, "learning_rate": 1.944189655172414e-06, "loss": 0.3038, "step": 187325 }, { "epoch": 1.84, "grad_norm": 13.854574203491211, "learning_rate": 1.9437586206896552e-06, "loss": 0.1447, "step": 187350 }, { "epoch": 1.84, "grad_norm": 5.103724002838135, "learning_rate": 1.9433275862068967e-06, "loss": 0.3161, "step": 187375 }, { "epoch": 1.84, "grad_norm": 11.150789260864258, "learning_rate": 1.942896551724138e-06, "loss": 0.1373, "step": 187400 }, { "epoch": 1.84, "grad_norm": 4.00669002532959, "learning_rate": 1.9424655172413796e-06, "loss": 0.2629, "step": 187425 }, { "epoch": 1.84, "grad_norm": 9.934408187866211, "learning_rate": 1.9420344827586206e-06, "loss": 0.1521, "step": 187450 }, { "epoch": 1.84, "grad_norm": 4.983835220336914, "learning_rate": 1.941603448275862e-06, "loss": 0.3086, "step": 187475 }, { "epoch": 1.84, "grad_norm": 17.674463272094727, "learning_rate": 1.9411724137931036e-06, "loss": 0.1639, "step": 187500 }, { "epoch": 1.84, "grad_norm": 4.200780868530273, "learning_rate": 1.940741379310345e-06, "loss": 0.3164, "step": 187525 }, { "epoch": 1.84, "grad_norm": 5.609017372131348, "learning_rate": 1.9403103448275865e-06, "loss": 0.1597, "step": 187550 }, { "epoch": 1.84, "grad_norm": 1.8509408235549927, "learning_rate": 1.9398793103448275e-06, "loss": 0.264, "step": 187575 }, { "epoch": 1.85, "grad_norm": 8.584492683410645, "learning_rate": 1.9394482758620694e-06, "loss": 0.1773, "step": 187600 }, { "epoch": 1.85, "grad_norm": 4.471729278564453, "learning_rate": 1.9390172413793104e-06, "loss": 0.2841, "step": 187625 }, { "epoch": 1.85, "grad_norm": 3.0648553371429443, "learning_rate": 1.938586206896552e-06, "loss": 0.1553, "step": 187650 }, { "epoch": 1.85, "grad_norm": 4.156123638153076, "learning_rate": 1.9381551724137933e-06, "loss": 0.2578, "step": 187675 }, { "epoch": 1.85, "grad_norm": 7.047397136688232, "learning_rate": 1.9377241379310348e-06, "loss": 0.1418, "step": 187700 }, { "epoch": 1.85, "grad_norm": 4.23894739151001, "learning_rate": 1.9372931034482762e-06, "loss": 0.2491, "step": 187725 }, { "epoch": 1.85, "grad_norm": 7.826472282409668, "learning_rate": 1.9368620689655173e-06, "loss": 0.1149, "step": 187750 }, { "epoch": 1.85, "grad_norm": 5.397862911224365, "learning_rate": 1.9364310344827587e-06, "loss": 0.2356, "step": 187775 }, { "epoch": 1.85, "grad_norm": 5.3486647605896, "learning_rate": 1.936e-06, "loss": 0.1558, "step": 187800 }, { "epoch": 1.85, "grad_norm": 4.378810882568359, "learning_rate": 1.9355689655172416e-06, "loss": 0.2889, "step": 187825 }, { "epoch": 1.85, "grad_norm": 3.2391366958618164, "learning_rate": 1.9351379310344827e-06, "loss": 0.1659, "step": 187850 }, { "epoch": 1.85, "grad_norm": 3.9550187587738037, "learning_rate": 1.934706896551724e-06, "loss": 0.3084, "step": 187875 }, { "epoch": 1.85, "grad_norm": 5.908755779266357, "learning_rate": 1.9342758620689656e-06, "loss": 0.1374, "step": 187900 }, { "epoch": 1.85, "grad_norm": 3.280076503753662, "learning_rate": 1.933844827586207e-06, "loss": 0.3249, "step": 187925 }, { "epoch": 1.85, "grad_norm": 7.969749450683594, "learning_rate": 1.9334137931034485e-06, "loss": 0.1516, "step": 187950 }, { "epoch": 1.85, "grad_norm": 5.2720842361450195, "learning_rate": 1.93298275862069e-06, "loss": 0.2519, "step": 187975 }, { "epoch": 1.85, "grad_norm": 2.6356382369995117, "learning_rate": 1.9325517241379314e-06, "loss": 0.1229, "step": 188000 }, { "epoch": 1.85, "grad_norm": 4.125906467437744, "learning_rate": 1.9321206896551724e-06, "loss": 0.257, "step": 188025 }, { "epoch": 1.85, "grad_norm": 7.498401165008545, "learning_rate": 1.931689655172414e-06, "loss": 0.148, "step": 188050 }, { "epoch": 1.85, "grad_norm": 4.392154216766357, "learning_rate": 1.9312586206896554e-06, "loss": 0.2491, "step": 188075 }, { "epoch": 1.85, "grad_norm": 4.7650933265686035, "learning_rate": 1.930827586206897e-06, "loss": 0.1189, "step": 188100 }, { "epoch": 1.85, "grad_norm": 6.57865047454834, "learning_rate": 1.930396551724138e-06, "loss": 0.295, "step": 188125 }, { "epoch": 1.85, "grad_norm": 7.576416969299316, "learning_rate": 1.9299655172413793e-06, "loss": 0.1308, "step": 188150 }, { "epoch": 1.85, "grad_norm": 5.094016075134277, "learning_rate": 1.929534482758621e-06, "loss": 0.2567, "step": 188175 }, { "epoch": 1.85, "grad_norm": 4.656303882598877, "learning_rate": 1.9291034482758622e-06, "loss": 0.1567, "step": 188200 }, { "epoch": 1.85, "grad_norm": 4.255486011505127, "learning_rate": 1.9286724137931037e-06, "loss": 0.254, "step": 188225 }, { "epoch": 1.85, "grad_norm": 7.895015716552734, "learning_rate": 1.928241379310345e-06, "loss": 0.1155, "step": 188250 }, { "epoch": 1.85, "grad_norm": 5.590345859527588, "learning_rate": 1.9278103448275866e-06, "loss": 0.2802, "step": 188275 }, { "epoch": 1.85, "grad_norm": 5.915462493896484, "learning_rate": 1.9273793103448276e-06, "loss": 0.1544, "step": 188300 }, { "epoch": 1.85, "grad_norm": 5.144279479980469, "learning_rate": 1.926948275862069e-06, "loss": 0.3027, "step": 188325 }, { "epoch": 1.85, "grad_norm": 17.847095489501953, "learning_rate": 1.9265172413793105e-06, "loss": 0.1266, "step": 188350 }, { "epoch": 1.85, "grad_norm": 4.429621696472168, "learning_rate": 1.926086206896552e-06, "loss": 0.2449, "step": 188375 }, { "epoch": 1.85, "grad_norm": 12.260237693786621, "learning_rate": 1.925655172413793e-06, "loss": 0.1486, "step": 188400 }, { "epoch": 1.85, "grad_norm": 4.11419153213501, "learning_rate": 1.9252241379310345e-06, "loss": 0.2326, "step": 188425 }, { "epoch": 1.85, "grad_norm": 11.044589042663574, "learning_rate": 1.924793103448276e-06, "loss": 0.1083, "step": 188450 }, { "epoch": 1.85, "grad_norm": 3.80531907081604, "learning_rate": 1.9243620689655174e-06, "loss": 0.2597, "step": 188475 }, { "epoch": 1.85, "grad_norm": 14.35232925415039, "learning_rate": 1.923931034482759e-06, "loss": 0.1174, "step": 188500 }, { "epoch": 1.85, "grad_norm": 5.718972206115723, "learning_rate": 1.9235e-06, "loss": 0.2434, "step": 188525 }, { "epoch": 1.85, "grad_norm": 8.286408424377441, "learning_rate": 1.9230689655172418e-06, "loss": 0.1444, "step": 188550 }, { "epoch": 1.85, "grad_norm": 8.2926664352417, "learning_rate": 1.922637931034483e-06, "loss": 0.2737, "step": 188575 }, { "epoch": 1.85, "grad_norm": 1.0206856727600098, "learning_rate": 1.9222068965517242e-06, "loss": 0.1209, "step": 188600 }, { "epoch": 1.86, "grad_norm": 3.986632823944092, "learning_rate": 1.9217931034482757e-06, "loss": 0.252, "step": 188625 }, { "epoch": 1.86, "grad_norm": 7.14196252822876, "learning_rate": 1.9213620689655176e-06, "loss": 0.148, "step": 188650 }, { "epoch": 1.86, "grad_norm": 5.132498264312744, "learning_rate": 1.920931034482759e-06, "loss": 0.2612, "step": 188675 }, { "epoch": 1.86, "grad_norm": 11.370010375976562, "learning_rate": 1.9205e-06, "loss": 0.1708, "step": 188700 }, { "epoch": 1.86, "grad_norm": 5.876043796539307, "learning_rate": 1.9200689655172415e-06, "loss": 0.2416, "step": 188725 }, { "epoch": 1.86, "grad_norm": 9.914443969726562, "learning_rate": 1.919637931034483e-06, "loss": 0.1552, "step": 188750 }, { "epoch": 1.86, "grad_norm": 4.915356159210205, "learning_rate": 1.9192068965517244e-06, "loss": 0.2783, "step": 188775 }, { "epoch": 1.86, "grad_norm": 7.152259826660156, "learning_rate": 1.9187758620689655e-06, "loss": 0.154, "step": 188800 }, { "epoch": 1.86, "grad_norm": 5.475561618804932, "learning_rate": 1.918344827586207e-06, "loss": 0.2153, "step": 188825 }, { "epoch": 1.86, "grad_norm": 7.46007776260376, "learning_rate": 1.9179137931034484e-06, "loss": 0.1586, "step": 188850 }, { "epoch": 1.86, "grad_norm": 7.916579246520996, "learning_rate": 1.91748275862069e-06, "loss": 0.2794, "step": 188875 }, { "epoch": 1.86, "grad_norm": 7.723626136779785, "learning_rate": 1.917051724137931e-06, "loss": 0.1481, "step": 188900 }, { "epoch": 1.86, "grad_norm": 5.274489402770996, "learning_rate": 1.9166206896551728e-06, "loss": 0.2411, "step": 188925 }, { "epoch": 1.86, "grad_norm": 11.461287498474121, "learning_rate": 1.9161896551724142e-06, "loss": 0.1401, "step": 188950 }, { "epoch": 1.86, "grad_norm": 6.55194616317749, "learning_rate": 1.9157586206896553e-06, "loss": 0.2462, "step": 188975 }, { "epoch": 1.86, "grad_norm": 4.745373725891113, "learning_rate": 1.9153275862068967e-06, "loss": 0.141, "step": 189000 }, { "epoch": 1.86, "grad_norm": 3.8597960472106934, "learning_rate": 1.914896551724138e-06, "loss": 0.2687, "step": 189025 }, { "epoch": 1.86, "grad_norm": 3.6666324138641357, "learning_rate": 1.9144655172413796e-06, "loss": 0.1501, "step": 189050 }, { "epoch": 1.86, "grad_norm": 4.943268775939941, "learning_rate": 1.9140344827586207e-06, "loss": 0.2595, "step": 189075 }, { "epoch": 1.86, "grad_norm": 6.806570053100586, "learning_rate": 1.913603448275862e-06, "loss": 0.1289, "step": 189100 }, { "epoch": 1.86, "grad_norm": 3.907304048538208, "learning_rate": 1.9131724137931036e-06, "loss": 0.3113, "step": 189125 }, { "epoch": 1.86, "grad_norm": 10.634482383728027, "learning_rate": 1.912741379310345e-06, "loss": 0.1201, "step": 189150 }, { "epoch": 1.86, "grad_norm": 5.644582748413086, "learning_rate": 1.9123103448275865e-06, "loss": 0.2174, "step": 189175 }, { "epoch": 1.86, "grad_norm": 2.4046201705932617, "learning_rate": 1.9118793103448275e-06, "loss": 0.1632, "step": 189200 }, { "epoch": 1.86, "grad_norm": 4.37299108505249, "learning_rate": 1.9114482758620694e-06, "loss": 0.2866, "step": 189225 }, { "epoch": 1.86, "grad_norm": 7.6494245529174805, "learning_rate": 1.9110172413793104e-06, "loss": 0.1811, "step": 189250 }, { "epoch": 1.86, "grad_norm": 4.64735746383667, "learning_rate": 1.910586206896552e-06, "loss": 0.3098, "step": 189275 }, { "epoch": 1.86, "grad_norm": 9.856730461120605, "learning_rate": 1.9101551724137933e-06, "loss": 0.1445, "step": 189300 }, { "epoch": 1.86, "grad_norm": 7.513467311859131, "learning_rate": 1.909724137931035e-06, "loss": 0.3145, "step": 189325 }, { "epoch": 1.86, "grad_norm": 5.700111389160156, "learning_rate": 1.909293103448276e-06, "loss": 0.1851, "step": 189350 }, { "epoch": 1.86, "grad_norm": 4.425722599029541, "learning_rate": 1.9088620689655173e-06, "loss": 0.2519, "step": 189375 }, { "epoch": 1.86, "grad_norm": 10.962285041809082, "learning_rate": 1.9084310344827587e-06, "loss": 0.146, "step": 189400 }, { "epoch": 1.86, "grad_norm": 3.5387392044067383, "learning_rate": 1.908e-06, "loss": 0.2791, "step": 189425 }, { "epoch": 1.86, "grad_norm": 6.657726764678955, "learning_rate": 1.9075689655172417e-06, "loss": 0.1161, "step": 189450 }, { "epoch": 1.86, "grad_norm": 4.4015631675720215, "learning_rate": 1.907137931034483e-06, "loss": 0.3041, "step": 189475 }, { "epoch": 1.86, "grad_norm": 6.434539794921875, "learning_rate": 1.9067068965517244e-06, "loss": 0.1571, "step": 189500 }, { "epoch": 1.86, "grad_norm": 7.138477802276611, "learning_rate": 1.9062758620689656e-06, "loss": 0.2444, "step": 189525 }, { "epoch": 1.86, "grad_norm": 8.230813980102539, "learning_rate": 1.905844827586207e-06, "loss": 0.1447, "step": 189550 }, { "epoch": 1.86, "grad_norm": 3.802945852279663, "learning_rate": 1.9054137931034483e-06, "loss": 0.2752, "step": 189575 }, { "epoch": 1.86, "grad_norm": 20.942228317260742, "learning_rate": 1.9049827586206898e-06, "loss": 0.0987, "step": 189600 }, { "epoch": 1.87, "grad_norm": 4.987681865692139, "learning_rate": 1.9045517241379314e-06, "loss": 0.2591, "step": 189625 }, { "epoch": 1.87, "grad_norm": 9.111459732055664, "learning_rate": 1.9041206896551725e-06, "loss": 0.1363, "step": 189650 }, { "epoch": 1.87, "grad_norm": 4.022135257720947, "learning_rate": 1.9036896551724141e-06, "loss": 0.2479, "step": 189675 }, { "epoch": 1.87, "grad_norm": 8.369165420532227, "learning_rate": 1.9032586206896554e-06, "loss": 0.1239, "step": 189700 }, { "epoch": 1.87, "grad_norm": 4.089527130126953, "learning_rate": 1.9028275862068968e-06, "loss": 0.2457, "step": 189725 }, { "epoch": 1.87, "grad_norm": 9.221041679382324, "learning_rate": 1.902396551724138e-06, "loss": 0.174, "step": 189750 }, { "epoch": 1.87, "grad_norm": 4.966671943664551, "learning_rate": 1.9019655172413795e-06, "loss": 0.2517, "step": 189775 }, { "epoch": 1.87, "grad_norm": 5.286986827850342, "learning_rate": 1.9015344827586208e-06, "loss": 0.138, "step": 189800 }, { "epoch": 1.87, "grad_norm": 6.642541408538818, "learning_rate": 1.9011034482758622e-06, "loss": 0.2645, "step": 189825 }, { "epoch": 1.87, "grad_norm": 9.985130310058594, "learning_rate": 1.9006724137931035e-06, "loss": 0.1569, "step": 189850 }, { "epoch": 1.87, "grad_norm": 5.7828593254089355, "learning_rate": 1.900241379310345e-06, "loss": 0.2622, "step": 189875 }, { "epoch": 1.87, "grad_norm": 7.0460124015808105, "learning_rate": 1.8998103448275864e-06, "loss": 0.1783, "step": 189900 }, { "epoch": 1.87, "grad_norm": 4.321002006530762, "learning_rate": 1.8993793103448276e-06, "loss": 0.2675, "step": 189925 }, { "epoch": 1.87, "grad_norm": 7.934269905090332, "learning_rate": 1.8989482758620693e-06, "loss": 0.144, "step": 189950 }, { "epoch": 1.87, "grad_norm": 3.301473379135132, "learning_rate": 1.8985172413793103e-06, "loss": 0.3366, "step": 189975 }, { "epoch": 1.87, "grad_norm": 7.807431697845459, "learning_rate": 1.898086206896552e-06, "loss": 0.1533, "step": 190000 }, { "epoch": 1.87, "grad_norm": 5.832084655761719, "learning_rate": 1.8976551724137932e-06, "loss": 0.2695, "step": 190025 }, { "epoch": 1.87, "grad_norm": 8.844830513000488, "learning_rate": 1.8972241379310347e-06, "loss": 0.1592, "step": 190050 }, { "epoch": 1.87, "grad_norm": 4.750310897827148, "learning_rate": 1.896793103448276e-06, "loss": 0.2451, "step": 190075 }, { "epoch": 1.87, "grad_norm": 6.604858875274658, "learning_rate": 1.8963620689655174e-06, "loss": 0.1212, "step": 190100 }, { "epoch": 1.87, "grad_norm": 2.674437999725342, "learning_rate": 1.8959310344827589e-06, "loss": 0.2326, "step": 190125 }, { "epoch": 1.87, "grad_norm": 10.632627487182617, "learning_rate": 1.8955e-06, "loss": 0.139, "step": 190150 }, { "epoch": 1.87, "grad_norm": 4.590479373931885, "learning_rate": 1.8950689655172416e-06, "loss": 0.2714, "step": 190175 }, { "epoch": 1.87, "grad_norm": 20.102764129638672, "learning_rate": 1.8946379310344828e-06, "loss": 0.1195, "step": 190200 }, { "epoch": 1.87, "grad_norm": 5.153801918029785, "learning_rate": 1.8942068965517243e-06, "loss": 0.268, "step": 190225 }, { "epoch": 1.87, "grad_norm": 7.610867977142334, "learning_rate": 1.8937758620689655e-06, "loss": 0.1367, "step": 190250 }, { "epoch": 1.87, "grad_norm": 4.651105880737305, "learning_rate": 1.8933448275862072e-06, "loss": 0.2632, "step": 190275 }, { "epoch": 1.87, "grad_norm": 8.192728996276855, "learning_rate": 1.8929137931034482e-06, "loss": 0.1382, "step": 190300 }, { "epoch": 1.87, "grad_norm": 4.847105979919434, "learning_rate": 1.8924827586206899e-06, "loss": 0.2429, "step": 190325 }, { "epoch": 1.87, "grad_norm": 9.802624702453613, "learning_rate": 1.8920517241379313e-06, "loss": 0.1362, "step": 190350 }, { "epoch": 1.87, "grad_norm": 7.519474029541016, "learning_rate": 1.8916206896551726e-06, "loss": 0.2601, "step": 190375 }, { "epoch": 1.87, "grad_norm": 12.23134994506836, "learning_rate": 1.891189655172414e-06, "loss": 0.157, "step": 190400 }, { "epoch": 1.87, "grad_norm": 4.55640172958374, "learning_rate": 1.8907586206896553e-06, "loss": 0.2655, "step": 190425 }, { "epoch": 1.87, "grad_norm": 6.914873123168945, "learning_rate": 1.8903275862068967e-06, "loss": 0.1216, "step": 190450 }, { "epoch": 1.87, "grad_norm": 5.304615497589111, "learning_rate": 1.889896551724138e-06, "loss": 0.2383, "step": 190475 }, { "epoch": 1.87, "grad_norm": 5.970919609069824, "learning_rate": 1.8894655172413794e-06, "loss": 0.1278, "step": 190500 }, { "epoch": 1.87, "grad_norm": 3.3387584686279297, "learning_rate": 1.8890344827586207e-06, "loss": 0.2595, "step": 190525 }, { "epoch": 1.87, "grad_norm": 7.9099931716918945, "learning_rate": 1.8886034482758621e-06, "loss": 0.1193, "step": 190550 }, { "epoch": 1.87, "grad_norm": 4.851579189300537, "learning_rate": 1.8881724137931038e-06, "loss": 0.2886, "step": 190575 }, { "epoch": 1.87, "grad_norm": 11.054656028747559, "learning_rate": 1.887741379310345e-06, "loss": 0.1575, "step": 190600 }, { "epoch": 1.87, "grad_norm": 4.512772560119629, "learning_rate": 1.8873103448275865e-06, "loss": 0.2175, "step": 190625 }, { "epoch": 1.88, "grad_norm": 6.983494758605957, "learning_rate": 1.8868793103448278e-06, "loss": 0.1382, "step": 190650 }, { "epoch": 1.88, "grad_norm": 5.20913553237915, "learning_rate": 1.8864482758620692e-06, "loss": 0.2429, "step": 190675 }, { "epoch": 1.88, "grad_norm": 3.9822514057159424, "learning_rate": 1.8860172413793105e-06, "loss": 0.1486, "step": 190700 }, { "epoch": 1.88, "grad_norm": 3.3392701148986816, "learning_rate": 1.8856034482758623e-06, "loss": 0.2651, "step": 190725 }, { "epoch": 1.88, "grad_norm": 9.174363136291504, "learning_rate": 1.8851724137931036e-06, "loss": 0.1331, "step": 190750 }, { "epoch": 1.88, "grad_norm": 3.009908437728882, "learning_rate": 1.8847586206896555e-06, "loss": 0.2876, "step": 190775 }, { "epoch": 1.88, "grad_norm": 8.293118476867676, "learning_rate": 1.8843275862068965e-06, "loss": 0.1184, "step": 190800 }, { "epoch": 1.88, "grad_norm": 4.134772777557373, "learning_rate": 1.8838965517241382e-06, "loss": 0.2715, "step": 190825 }, { "epoch": 1.88, "grad_norm": 5.620519161224365, "learning_rate": 1.8834655172413794e-06, "loss": 0.116, "step": 190850 }, { "epoch": 1.88, "grad_norm": 4.326235771179199, "learning_rate": 1.8830344827586209e-06, "loss": 0.2457, "step": 190875 }, { "epoch": 1.88, "grad_norm": 7.798838138580322, "learning_rate": 1.8826034482758623e-06, "loss": 0.1447, "step": 190900 }, { "epoch": 1.88, "grad_norm": 4.353626251220703, "learning_rate": 1.8821724137931036e-06, "loss": 0.2771, "step": 190925 }, { "epoch": 1.88, "grad_norm": 7.882296085357666, "learning_rate": 1.881741379310345e-06, "loss": 0.1456, "step": 190950 }, { "epoch": 1.88, "grad_norm": 3.7450103759765625, "learning_rate": 1.8813103448275863e-06, "loss": 0.2654, "step": 190975 }, { "epoch": 1.88, "grad_norm": 5.863707542419434, "learning_rate": 1.8808793103448277e-06, "loss": 0.1397, "step": 191000 }, { "epoch": 1.88, "grad_norm": 3.907470226287842, "learning_rate": 1.880448275862069e-06, "loss": 0.2949, "step": 191025 }, { "epoch": 1.88, "grad_norm": 12.373068809509277, "learning_rate": 1.8800172413793104e-06, "loss": 0.1317, "step": 191050 }, { "epoch": 1.88, "grad_norm": 5.794064044952393, "learning_rate": 1.8795862068965517e-06, "loss": 0.2373, "step": 191075 }, { "epoch": 1.88, "grad_norm": 8.02979850769043, "learning_rate": 1.8791551724137933e-06, "loss": 0.1559, "step": 191100 }, { "epoch": 1.88, "grad_norm": 3.3045766353607178, "learning_rate": 1.8787241379310348e-06, "loss": 0.2451, "step": 191125 }, { "epoch": 1.88, "grad_norm": 14.628726959228516, "learning_rate": 1.878293103448276e-06, "loss": 0.1853, "step": 191150 }, { "epoch": 1.88, "grad_norm": 4.566246509552002, "learning_rate": 1.8778620689655175e-06, "loss": 0.2962, "step": 191175 }, { "epoch": 1.88, "grad_norm": 1.054259181022644, "learning_rate": 1.8774310344827587e-06, "loss": 0.1349, "step": 191200 }, { "epoch": 1.88, "grad_norm": 9.34519100189209, "learning_rate": 1.8770000000000002e-06, "loss": 0.2642, "step": 191225 }, { "epoch": 1.88, "grad_norm": 11.849300384521484, "learning_rate": 1.8765689655172414e-06, "loss": 0.1607, "step": 191250 }, { "epoch": 1.88, "grad_norm": 4.100924491882324, "learning_rate": 1.876137931034483e-06, "loss": 0.2369, "step": 191275 }, { "epoch": 1.88, "grad_norm": 11.945590019226074, "learning_rate": 1.8757068965517241e-06, "loss": 0.1577, "step": 191300 }, { "epoch": 1.88, "grad_norm": 3.448822498321533, "learning_rate": 1.8752758620689656e-06, "loss": 0.2356, "step": 191325 }, { "epoch": 1.88, "grad_norm": 10.70805549621582, "learning_rate": 1.8748448275862073e-06, "loss": 0.1467, "step": 191350 }, { "epoch": 1.88, "grad_norm": 5.546382427215576, "learning_rate": 1.8744137931034483e-06, "loss": 0.2793, "step": 191375 }, { "epoch": 1.88, "grad_norm": 8.117236137390137, "learning_rate": 1.87398275862069e-06, "loss": 0.1119, "step": 191400 }, { "epoch": 1.88, "grad_norm": 5.098230361938477, "learning_rate": 1.8735517241379312e-06, "loss": 0.2773, "step": 191425 }, { "epoch": 1.88, "grad_norm": 6.742635726928711, "learning_rate": 1.8731206896551727e-06, "loss": 0.1251, "step": 191450 }, { "epoch": 1.88, "grad_norm": 5.053881645202637, "learning_rate": 1.872689655172414e-06, "loss": 0.3055, "step": 191475 }, { "epoch": 1.88, "grad_norm": 4.9945902824401855, "learning_rate": 1.8722586206896554e-06, "loss": 0.1128, "step": 191500 }, { "epoch": 1.88, "grad_norm": 4.857369422912598, "learning_rate": 1.8718275862068966e-06, "loss": 0.2974, "step": 191525 }, { "epoch": 1.88, "grad_norm": 12.574186325073242, "learning_rate": 1.871396551724138e-06, "loss": 0.1877, "step": 191550 }, { "epoch": 1.88, "grad_norm": 4.118589878082275, "learning_rate": 1.8709655172413795e-06, "loss": 0.2837, "step": 191575 }, { "epoch": 1.88, "grad_norm": 4.110405921936035, "learning_rate": 1.8705344827586208e-06, "loss": 0.161, "step": 191600 }, { "epoch": 1.88, "grad_norm": 5.025146484375, "learning_rate": 1.8701034482758622e-06, "loss": 0.2394, "step": 191625 }, { "epoch": 1.88, "grad_norm": 6.3771586418151855, "learning_rate": 1.8696724137931035e-06, "loss": 0.1582, "step": 191650 }, { "epoch": 1.89, "grad_norm": 5.615410804748535, "learning_rate": 1.8692413793103451e-06, "loss": 0.2726, "step": 191675 }, { "epoch": 1.89, "grad_norm": 4.063571453094482, "learning_rate": 1.8688103448275862e-06, "loss": 0.1177, "step": 191700 }, { "epoch": 1.89, "grad_norm": 6.004093170166016, "learning_rate": 1.8683793103448278e-06, "loss": 0.2893, "step": 191725 }, { "epoch": 1.89, "grad_norm": 3.114818811416626, "learning_rate": 1.867948275862069e-06, "loss": 0.1469, "step": 191750 }, { "epoch": 1.89, "grad_norm": 4.747753620147705, "learning_rate": 1.8675172413793105e-06, "loss": 0.2298, "step": 191775 }, { "epoch": 1.89, "grad_norm": 6.1370415687561035, "learning_rate": 1.867086206896552e-06, "loss": 0.1623, "step": 191800 }, { "epoch": 1.89, "grad_norm": 6.745906352996826, "learning_rate": 1.8666551724137932e-06, "loss": 0.279, "step": 191825 }, { "epoch": 1.89, "grad_norm": 6.989050388336182, "learning_rate": 1.8662241379310347e-06, "loss": 0.1139, "step": 191850 }, { "epoch": 1.89, "grad_norm": 5.047282695770264, "learning_rate": 1.865793103448276e-06, "loss": 0.2458, "step": 191875 }, { "epoch": 1.89, "grad_norm": 1.3503241539001465, "learning_rate": 1.8653620689655174e-06, "loss": 0.1127, "step": 191900 }, { "epoch": 1.89, "grad_norm": 4.826085567474365, "learning_rate": 1.8649310344827586e-06, "loss": 0.3354, "step": 191925 }, { "epoch": 1.89, "grad_norm": 11.529921531677246, "learning_rate": 1.8645e-06, "loss": 0.1382, "step": 191950 }, { "epoch": 1.89, "grad_norm": 3.763731002807617, "learning_rate": 1.8640689655172414e-06, "loss": 0.2572, "step": 191975 }, { "epoch": 1.89, "grad_norm": 5.134704113006592, "learning_rate": 1.863637931034483e-06, "loss": 0.1237, "step": 192000 }, { "epoch": 1.89, "grad_norm": 5.068518161773682, "learning_rate": 1.863206896551724e-06, "loss": 0.224, "step": 192025 }, { "epoch": 1.89, "grad_norm": 7.529655456542969, "learning_rate": 1.8627758620689657e-06, "loss": 0.1159, "step": 192050 }, { "epoch": 1.89, "grad_norm": 4.803777694702148, "learning_rate": 1.8623448275862072e-06, "loss": 0.2816, "step": 192075 }, { "epoch": 1.89, "grad_norm": 8.149925231933594, "learning_rate": 1.8619137931034484e-06, "loss": 0.1286, "step": 192100 }, { "epoch": 1.89, "grad_norm": 6.018075466156006, "learning_rate": 1.8614827586206899e-06, "loss": 0.2523, "step": 192125 }, { "epoch": 1.89, "grad_norm": 17.530719757080078, "learning_rate": 1.8610517241379311e-06, "loss": 0.1569, "step": 192150 }, { "epoch": 1.89, "grad_norm": 4.339763641357422, "learning_rate": 1.8606206896551726e-06, "loss": 0.289, "step": 192175 }, { "epoch": 1.89, "grad_norm": 6.316220283508301, "learning_rate": 1.8601896551724138e-06, "loss": 0.1174, "step": 192200 }, { "epoch": 1.89, "grad_norm": 3.823381185531616, "learning_rate": 1.8597586206896553e-06, "loss": 0.2994, "step": 192225 }, { "epoch": 1.89, "grad_norm": 5.0428080558776855, "learning_rate": 1.8593275862068965e-06, "loss": 0.1272, "step": 192250 }, { "epoch": 1.89, "grad_norm": 6.952986240386963, "learning_rate": 1.858896551724138e-06, "loss": 0.2851, "step": 192275 }, { "epoch": 1.89, "grad_norm": 3.9490911960601807, "learning_rate": 1.8584655172413796e-06, "loss": 0.1329, "step": 192300 }, { "epoch": 1.89, "grad_norm": 4.194485664367676, "learning_rate": 1.8580344827586209e-06, "loss": 0.267, "step": 192325 }, { "epoch": 1.89, "grad_norm": 4.668405532836914, "learning_rate": 1.8576034482758624e-06, "loss": 0.1204, "step": 192350 }, { "epoch": 1.89, "grad_norm": 4.371639728546143, "learning_rate": 1.8571724137931036e-06, "loss": 0.2796, "step": 192375 }, { "epoch": 1.89, "grad_norm": 12.004880905151367, "learning_rate": 1.856741379310345e-06, "loss": 0.1676, "step": 192400 }, { "epoch": 1.89, "grad_norm": 4.485485076904297, "learning_rate": 1.8563103448275863e-06, "loss": 0.2755, "step": 192425 }, { "epoch": 1.89, "grad_norm": 9.124629974365234, "learning_rate": 1.8558793103448278e-06, "loss": 0.1255, "step": 192450 }, { "epoch": 1.89, "grad_norm": 4.954138278961182, "learning_rate": 1.855448275862069e-06, "loss": 0.3177, "step": 192475 }, { "epoch": 1.89, "grad_norm": 8.475653648376465, "learning_rate": 1.8550172413793105e-06, "loss": 0.1368, "step": 192500 }, { "epoch": 1.89, "grad_norm": 3.855558395385742, "learning_rate": 1.854586206896552e-06, "loss": 0.2607, "step": 192525 }, { "epoch": 1.89, "grad_norm": 9.303563117980957, "learning_rate": 1.8541551724137932e-06, "loss": 0.13, "step": 192550 }, { "epoch": 1.89, "grad_norm": 5.505710124969482, "learning_rate": 1.8537241379310348e-06, "loss": 0.2781, "step": 192575 }, { "epoch": 1.89, "grad_norm": 9.49133586883545, "learning_rate": 1.8532931034482759e-06, "loss": 0.1462, "step": 192600 }, { "epoch": 1.89, "grad_norm": 4.418076992034912, "learning_rate": 1.8528620689655175e-06, "loss": 0.2455, "step": 192625 }, { "epoch": 1.89, "grad_norm": 7.755890369415283, "learning_rate": 1.8524310344827588e-06, "loss": 0.1486, "step": 192650 }, { "epoch": 1.9, "grad_norm": 6.344155788421631, "learning_rate": 1.8520000000000002e-06, "loss": 0.2428, "step": 192675 }, { "epoch": 1.9, "grad_norm": 8.495250701904297, "learning_rate": 1.8515689655172415e-06, "loss": 0.1434, "step": 192700 }, { "epoch": 1.9, "grad_norm": 4.516077518463135, "learning_rate": 1.851137931034483e-06, "loss": 0.2682, "step": 192725 }, { "epoch": 1.9, "grad_norm": 3.2458713054656982, "learning_rate": 1.8507068965517244e-06, "loss": 0.1278, "step": 192750 }, { "epoch": 1.9, "grad_norm": 4.24716329574585, "learning_rate": 1.8502758620689656e-06, "loss": 0.2237, "step": 192775 }, { "epoch": 1.9, "grad_norm": 8.75756549835205, "learning_rate": 1.849844827586207e-06, "loss": 0.1292, "step": 192800 }, { "epoch": 1.9, "grad_norm": 4.575037002563477, "learning_rate": 1.8494137931034483e-06, "loss": 0.2882, "step": 192825 }, { "epoch": 1.9, "grad_norm": 14.204116821289062, "learning_rate": 1.8489827586206898e-06, "loss": 0.1252, "step": 192850 }, { "epoch": 1.9, "grad_norm": 10.542576789855957, "learning_rate": 1.848551724137931e-06, "loss": 0.263, "step": 192875 }, { "epoch": 1.9, "grad_norm": 4.111762523651123, "learning_rate": 1.8481206896551727e-06, "loss": 0.1316, "step": 192900 }, { "epoch": 1.9, "grad_norm": 4.330190658569336, "learning_rate": 1.8476896551724137e-06, "loss": 0.2499, "step": 192925 }, { "epoch": 1.9, "grad_norm": 11.361190795898438, "learning_rate": 1.8472586206896554e-06, "loss": 0.1727, "step": 192950 }, { "epoch": 1.9, "grad_norm": 3.8969433307647705, "learning_rate": 1.8468275862068969e-06, "loss": 0.3117, "step": 192975 }, { "epoch": 1.9, "grad_norm": 8.592256546020508, "learning_rate": 1.846396551724138e-06, "loss": 0.1435, "step": 193000 }, { "epoch": 1.9, "grad_norm": 4.70979642868042, "learning_rate": 1.8459655172413796e-06, "loss": 0.258, "step": 193025 }, { "epoch": 1.9, "grad_norm": 9.319150924682617, "learning_rate": 1.8455344827586208e-06, "loss": 0.1285, "step": 193050 }, { "epoch": 1.9, "grad_norm": 5.4269208908081055, "learning_rate": 1.8451034482758623e-06, "loss": 0.2422, "step": 193075 }, { "epoch": 1.9, "grad_norm": 9.576363563537598, "learning_rate": 1.8446724137931035e-06, "loss": 0.1262, "step": 193100 }, { "epoch": 1.9, "grad_norm": 6.035719871520996, "learning_rate": 1.844241379310345e-06, "loss": 0.2797, "step": 193125 }, { "epoch": 1.9, "grad_norm": 9.84602165222168, "learning_rate": 1.8438103448275862e-06, "loss": 0.1318, "step": 193150 }, { "epoch": 1.9, "grad_norm": 5.153491973876953, "learning_rate": 1.8433793103448277e-06, "loss": 0.321, "step": 193175 }, { "epoch": 1.9, "grad_norm": 6.862173080444336, "learning_rate": 1.842948275862069e-06, "loss": 0.1364, "step": 193200 }, { "epoch": 1.9, "grad_norm": 4.760772705078125, "learning_rate": 1.8425172413793106e-06, "loss": 0.3312, "step": 193225 }, { "epoch": 1.9, "grad_norm": 11.403990745544434, "learning_rate": 1.842086206896552e-06, "loss": 0.1367, "step": 193250 }, { "epoch": 1.9, "grad_norm": 5.20149040222168, "learning_rate": 1.8416551724137933e-06, "loss": 0.2751, "step": 193275 }, { "epoch": 1.9, "grad_norm": 7.059494972229004, "learning_rate": 1.8412241379310347e-06, "loss": 0.1646, "step": 193300 }, { "epoch": 1.9, "grad_norm": 3.6308107376098633, "learning_rate": 1.840793103448276e-06, "loss": 0.2666, "step": 193325 }, { "epoch": 1.9, "grad_norm": 7.597123622894287, "learning_rate": 1.8403620689655174e-06, "loss": 0.1951, "step": 193350 }, { "epoch": 1.9, "grad_norm": 5.2627153396606445, "learning_rate": 1.8399310344827587e-06, "loss": 0.2628, "step": 193375 }, { "epoch": 1.9, "grad_norm": 7.876093864440918, "learning_rate": 1.8395000000000001e-06, "loss": 0.1299, "step": 193400 }, { "epoch": 1.9, "grad_norm": 5.808713912963867, "learning_rate": 1.8390689655172414e-06, "loss": 0.2588, "step": 193425 }, { "epoch": 1.9, "grad_norm": 9.638476371765137, "learning_rate": 1.8386379310344828e-06, "loss": 0.1179, "step": 193450 }, { "epoch": 1.9, "grad_norm": 3.636791944503784, "learning_rate": 1.8382068965517245e-06, "loss": 0.2583, "step": 193475 }, { "epoch": 1.9, "grad_norm": 10.914167404174805, "learning_rate": 1.8377758620689655e-06, "loss": 0.1418, "step": 193500 }, { "epoch": 1.9, "grad_norm": 3.4266598224639893, "learning_rate": 1.8373448275862072e-06, "loss": 0.3299, "step": 193525 }, { "epoch": 1.9, "grad_norm": 7.172152996063232, "learning_rate": 1.8369137931034484e-06, "loss": 0.1316, "step": 193550 }, { "epoch": 1.9, "grad_norm": 3.959348440170288, "learning_rate": 1.83648275862069e-06, "loss": 0.269, "step": 193575 }, { "epoch": 1.9, "grad_norm": 10.065052032470703, "learning_rate": 1.8360517241379311e-06, "loss": 0.2017, "step": 193600 }, { "epoch": 1.9, "grad_norm": 4.202756404876709, "learning_rate": 1.8356206896551726e-06, "loss": 0.2717, "step": 193625 }, { "epoch": 1.9, "grad_norm": 3.0509917736053467, "learning_rate": 1.8351896551724139e-06, "loss": 0.1812, "step": 193650 }, { "epoch": 1.9, "grad_norm": 3.8929972648620605, "learning_rate": 1.8347586206896553e-06, "loss": 0.2631, "step": 193675 }, { "epoch": 1.91, "grad_norm": 9.383623123168945, "learning_rate": 1.8343275862068968e-06, "loss": 0.0841, "step": 193700 }, { "epoch": 1.91, "grad_norm": 4.018328666687012, "learning_rate": 1.833896551724138e-06, "loss": 0.271, "step": 193725 }, { "epoch": 1.91, "grad_norm": 11.896957397460938, "learning_rate": 1.8334655172413795e-06, "loss": 0.1293, "step": 193750 }, { "epoch": 1.91, "grad_norm": 3.5205090045928955, "learning_rate": 1.8330344827586207e-06, "loss": 0.2746, "step": 193775 }, { "epoch": 1.91, "grad_norm": 13.439962387084961, "learning_rate": 1.8326034482758624e-06, "loss": 0.156, "step": 193800 }, { "epoch": 1.91, "grad_norm": 12.516736030578613, "learning_rate": 1.8321724137931034e-06, "loss": 0.2434, "step": 193825 }, { "epoch": 1.91, "grad_norm": 4.672171592712402, "learning_rate": 1.831741379310345e-06, "loss": 0.1397, "step": 193850 }, { "epoch": 1.91, "grad_norm": 3.7502264976501465, "learning_rate": 1.8313103448275863e-06, "loss": 0.2845, "step": 193875 }, { "epoch": 1.91, "grad_norm": 9.29668140411377, "learning_rate": 1.8308793103448278e-06, "loss": 0.1287, "step": 193900 }, { "epoch": 1.91, "grad_norm": 5.030355930328369, "learning_rate": 1.8304482758620692e-06, "loss": 0.2769, "step": 193925 }, { "epoch": 1.91, "grad_norm": 0.9714958071708679, "learning_rate": 1.8300172413793105e-06, "loss": 0.185, "step": 193950 }, { "epoch": 1.91, "grad_norm": 6.112026691436768, "learning_rate": 1.829586206896552e-06, "loss": 0.2826, "step": 193975 }, { "epoch": 1.91, "grad_norm": 4.6860833168029785, "learning_rate": 1.8291551724137932e-06, "loss": 0.1263, "step": 194000 }, { "epoch": 1.91, "grad_norm": 2.969691038131714, "learning_rate": 1.8287241379310346e-06, "loss": 0.311, "step": 194025 }, { "epoch": 1.91, "grad_norm": 10.264383316040039, "learning_rate": 1.8282931034482759e-06, "loss": 0.141, "step": 194050 }, { "epoch": 1.91, "grad_norm": 4.532537937164307, "learning_rate": 1.8278620689655173e-06, "loss": 0.2771, "step": 194075 }, { "epoch": 1.91, "grad_norm": 13.717846870422363, "learning_rate": 1.8274310344827586e-06, "loss": 0.1017, "step": 194100 }, { "epoch": 1.91, "grad_norm": 4.568840503692627, "learning_rate": 1.8270000000000003e-06, "loss": 0.2458, "step": 194125 }, { "epoch": 1.91, "grad_norm": 9.582282066345215, "learning_rate": 1.8265689655172417e-06, "loss": 0.1328, "step": 194150 }, { "epoch": 1.91, "grad_norm": 5.294723987579346, "learning_rate": 1.826137931034483e-06, "loss": 0.3281, "step": 194175 }, { "epoch": 1.91, "grad_norm": 13.080141067504883, "learning_rate": 1.8257068965517244e-06, "loss": 0.1722, "step": 194200 }, { "epoch": 1.91, "grad_norm": 4.6878252029418945, "learning_rate": 1.8252758620689657e-06, "loss": 0.2752, "step": 194225 }, { "epoch": 1.91, "grad_norm": 7.557610988616943, "learning_rate": 1.8248448275862071e-06, "loss": 0.1472, "step": 194250 }, { "epoch": 1.91, "grad_norm": 6.357382774353027, "learning_rate": 1.8244137931034484e-06, "loss": 0.2833, "step": 194275 }, { "epoch": 1.91, "grad_norm": 12.038052558898926, "learning_rate": 1.8239827586206898e-06, "loss": 0.1077, "step": 194300 }, { "epoch": 1.91, "grad_norm": 3.93405818939209, "learning_rate": 1.823551724137931e-06, "loss": 0.2867, "step": 194325 }, { "epoch": 1.91, "grad_norm": 7.482386112213135, "learning_rate": 1.8231206896551725e-06, "loss": 0.1525, "step": 194350 }, { "epoch": 1.91, "grad_norm": 5.343409061431885, "learning_rate": 1.8226896551724138e-06, "loss": 0.3269, "step": 194375 }, { "epoch": 1.91, "grad_norm": 9.220532417297363, "learning_rate": 1.8222586206896552e-06, "loss": 0.1325, "step": 194400 }, { "epoch": 1.91, "grad_norm": 5.013116836547852, "learning_rate": 1.8218275862068969e-06, "loss": 0.2766, "step": 194425 }, { "epoch": 1.91, "grad_norm": 7.923734188079834, "learning_rate": 1.8213965517241381e-06, "loss": 0.1136, "step": 194450 }, { "epoch": 1.91, "grad_norm": 3.794426202774048, "learning_rate": 1.8209655172413796e-06, "loss": 0.2706, "step": 194475 }, { "epoch": 1.91, "grad_norm": 9.23122787475586, "learning_rate": 1.8205344827586208e-06, "loss": 0.1363, "step": 194500 }, { "epoch": 1.91, "grad_norm": 4.099663257598877, "learning_rate": 1.8201034482758623e-06, "loss": 0.2798, "step": 194525 }, { "epoch": 1.91, "grad_norm": 9.283904075622559, "learning_rate": 1.8196724137931035e-06, "loss": 0.1442, "step": 194550 }, { "epoch": 1.91, "grad_norm": 4.012804985046387, "learning_rate": 1.819241379310345e-06, "loss": 0.2876, "step": 194575 }, { "epoch": 1.91, "grad_norm": 8.803170204162598, "learning_rate": 1.8188103448275862e-06, "loss": 0.144, "step": 194600 }, { "epoch": 1.91, "grad_norm": 4.47322940826416, "learning_rate": 1.8183793103448277e-06, "loss": 0.2501, "step": 194625 }, { "epoch": 1.91, "grad_norm": 12.918756484985352, "learning_rate": 1.8179482758620691e-06, "loss": 0.1221, "step": 194650 }, { "epoch": 1.91, "grad_norm": 5.833967208862305, "learning_rate": 1.8175172413793104e-06, "loss": 0.2766, "step": 194675 }, { "epoch": 1.91, "grad_norm": 6.436094284057617, "learning_rate": 1.817086206896552e-06, "loss": 0.152, "step": 194700 }, { "epoch": 1.92, "grad_norm": 5.226970672607422, "learning_rate": 1.816655172413793e-06, "loss": 0.2675, "step": 194725 }, { "epoch": 1.92, "grad_norm": 6.011135101318359, "learning_rate": 1.8162241379310348e-06, "loss": 0.1309, "step": 194750 }, { "epoch": 1.92, "grad_norm": 3.771667003631592, "learning_rate": 1.815793103448276e-06, "loss": 0.206, "step": 194775 }, { "epoch": 1.92, "grad_norm": 7.457636833190918, "learning_rate": 1.8153620689655175e-06, "loss": 0.1289, "step": 194800 }, { "epoch": 1.92, "grad_norm": 3.028024196624756, "learning_rate": 1.8149310344827587e-06, "loss": 0.232, "step": 194825 }, { "epoch": 1.92, "grad_norm": 9.026468276977539, "learning_rate": 1.8145000000000002e-06, "loss": 0.1527, "step": 194850 }, { "epoch": 1.92, "grad_norm": 3.977402687072754, "learning_rate": 1.8140689655172416e-06, "loss": 0.2898, "step": 194875 }, { "epoch": 1.92, "grad_norm": 10.969671249389648, "learning_rate": 1.8136379310344829e-06, "loss": 0.164, "step": 194900 }, { "epoch": 1.92, "grad_norm": 5.788134574890137, "learning_rate": 1.8132068965517243e-06, "loss": 0.2439, "step": 194925 }, { "epoch": 1.92, "grad_norm": 4.082048416137695, "learning_rate": 1.8127758620689656e-06, "loss": 0.094, "step": 194950 }, { "epoch": 1.92, "grad_norm": 10.552329063415527, "learning_rate": 1.812344827586207e-06, "loss": 0.2376, "step": 194975 }, { "epoch": 1.92, "grad_norm": 10.553384780883789, "learning_rate": 1.8119137931034483e-06, "loss": 0.1137, "step": 195000 }, { "epoch": 1.92, "grad_norm": 4.183693885803223, "learning_rate": 1.81148275862069e-06, "loss": 0.2364, "step": 195025 }, { "epoch": 1.92, "grad_norm": 4.319622993469238, "learning_rate": 1.811051724137931e-06, "loss": 0.1456, "step": 195050 }, { "epoch": 1.92, "grad_norm": 5.990379810333252, "learning_rate": 1.8106206896551726e-06, "loss": 0.209, "step": 195075 }, { "epoch": 1.92, "grad_norm": 13.770355224609375, "learning_rate": 1.810189655172414e-06, "loss": 0.1394, "step": 195100 }, { "epoch": 1.92, "grad_norm": 4.5027360916137695, "learning_rate": 1.8097586206896553e-06, "loss": 0.2789, "step": 195125 }, { "epoch": 1.92, "grad_norm": 7.351436614990234, "learning_rate": 1.8093275862068968e-06, "loss": 0.1245, "step": 195150 }, { "epoch": 1.92, "grad_norm": 3.6617660522460938, "learning_rate": 1.808896551724138e-06, "loss": 0.2546, "step": 195175 }, { "epoch": 1.92, "grad_norm": 8.473050117492676, "learning_rate": 1.8084655172413795e-06, "loss": 0.1581, "step": 195200 }, { "epoch": 1.92, "grad_norm": 6.114410400390625, "learning_rate": 1.8080344827586207e-06, "loss": 0.3168, "step": 195225 }, { "epoch": 1.92, "grad_norm": 9.844667434692383, "learning_rate": 1.8076034482758622e-06, "loss": 0.1701, "step": 195250 }, { "epoch": 1.92, "grad_norm": 3.3029370307922363, "learning_rate": 1.8071724137931034e-06, "loss": 0.2691, "step": 195275 }, { "epoch": 1.92, "grad_norm": 9.623431205749512, "learning_rate": 1.806741379310345e-06, "loss": 0.1506, "step": 195300 }, { "epoch": 1.92, "grad_norm": 3.4048163890838623, "learning_rate": 1.8063103448275866e-06, "loss": 0.2749, "step": 195325 }, { "epoch": 1.92, "grad_norm": 8.243891716003418, "learning_rate": 1.8058793103448278e-06, "loss": 0.1443, "step": 195350 }, { "epoch": 1.92, "grad_norm": 5.976192474365234, "learning_rate": 1.8054482758620693e-06, "loss": 0.2386, "step": 195375 }, { "epoch": 1.92, "grad_norm": 8.961409568786621, "learning_rate": 1.8050172413793105e-06, "loss": 0.1517, "step": 195400 }, { "epoch": 1.92, "grad_norm": 3.9837119579315186, "learning_rate": 1.804586206896552e-06, "loss": 0.3278, "step": 195425 }, { "epoch": 1.92, "grad_norm": 6.996262550354004, "learning_rate": 1.8041551724137932e-06, "loss": 0.1363, "step": 195450 }, { "epoch": 1.92, "grad_norm": 5.544023513793945, "learning_rate": 1.8037241379310347e-06, "loss": 0.2268, "step": 195475 }, { "epoch": 1.92, "grad_norm": 6.733432292938232, "learning_rate": 1.803293103448276e-06, "loss": 0.1211, "step": 195500 }, { "epoch": 1.92, "grad_norm": 4.595959663391113, "learning_rate": 1.8028620689655174e-06, "loss": 0.2948, "step": 195525 }, { "epoch": 1.92, "grad_norm": 5.258302211761475, "learning_rate": 1.8024310344827586e-06, "loss": 0.1631, "step": 195550 }, { "epoch": 1.92, "grad_norm": 3.7643043994903564, "learning_rate": 1.802e-06, "loss": 0.2593, "step": 195575 }, { "epoch": 1.92, "grad_norm": 7.059357643127441, "learning_rate": 1.8015689655172417e-06, "loss": 0.1447, "step": 195600 }, { "epoch": 1.92, "grad_norm": 3.9807188510894775, "learning_rate": 1.8011379310344828e-06, "loss": 0.2865, "step": 195625 }, { "epoch": 1.92, "grad_norm": 7.254642009735107, "learning_rate": 1.8007068965517244e-06, "loss": 0.116, "step": 195650 }, { "epoch": 1.92, "grad_norm": 3.949000120162964, "learning_rate": 1.800293103448276e-06, "loss": 0.243, "step": 195675 }, { "epoch": 1.92, "grad_norm": 12.829222679138184, "learning_rate": 1.7998620689655176e-06, "loss": 0.1469, "step": 195700 }, { "epoch": 1.93, "grad_norm": 4.467544078826904, "learning_rate": 1.7994310344827586e-06, "loss": 0.3029, "step": 195725 }, { "epoch": 1.93, "grad_norm": 7.609059810638428, "learning_rate": 1.7990000000000003e-06, "loss": 0.1329, "step": 195750 }, { "epoch": 1.93, "grad_norm": 4.865446090698242, "learning_rate": 1.7985689655172415e-06, "loss": 0.2754, "step": 195775 }, { "epoch": 1.93, "grad_norm": 9.494041442871094, "learning_rate": 1.798137931034483e-06, "loss": 0.1301, "step": 195800 }, { "epoch": 1.93, "grad_norm": 5.291029930114746, "learning_rate": 1.7977068965517244e-06, "loss": 0.2502, "step": 195825 }, { "epoch": 1.93, "grad_norm": 11.671672821044922, "learning_rate": 1.7972758620689657e-06, "loss": 0.1553, "step": 195850 }, { "epoch": 1.93, "grad_norm": 4.379571437835693, "learning_rate": 1.7968448275862071e-06, "loss": 0.2311, "step": 195875 }, { "epoch": 1.93, "grad_norm": 7.990503787994385, "learning_rate": 1.7964137931034484e-06, "loss": 0.1258, "step": 195900 }, { "epoch": 1.93, "grad_norm": 4.060763835906982, "learning_rate": 1.7959827586206898e-06, "loss": 0.2416, "step": 195925 }, { "epoch": 1.93, "grad_norm": 7.1005754470825195, "learning_rate": 1.795551724137931e-06, "loss": 0.1433, "step": 195950 }, { "epoch": 1.93, "grad_norm": 5.495706081390381, "learning_rate": 1.7951206896551725e-06, "loss": 0.2931, "step": 195975 }, { "epoch": 1.93, "grad_norm": 12.886534690856934, "learning_rate": 1.7946896551724138e-06, "loss": 0.1273, "step": 196000 }, { "epoch": 1.93, "grad_norm": 4.120438098907471, "learning_rate": 1.7942586206896554e-06, "loss": 0.2439, "step": 196025 }, { "epoch": 1.93, "grad_norm": 13.834447860717773, "learning_rate": 1.7938275862068965e-06, "loss": 0.145, "step": 196050 }, { "epoch": 1.93, "grad_norm": 3.187443256378174, "learning_rate": 1.7933965517241381e-06, "loss": 0.2589, "step": 196075 }, { "epoch": 1.93, "grad_norm": 2.178675413131714, "learning_rate": 1.7929655172413796e-06, "loss": 0.1227, "step": 196100 }, { "epoch": 1.93, "grad_norm": 3.8908350467681885, "learning_rate": 1.7925344827586208e-06, "loss": 0.2515, "step": 196125 }, { "epoch": 1.93, "grad_norm": 10.853026390075684, "learning_rate": 1.7921034482758623e-06, "loss": 0.1486, "step": 196150 }, { "epoch": 1.93, "grad_norm": 5.553339004516602, "learning_rate": 1.7916724137931035e-06, "loss": 0.2993, "step": 196175 }, { "epoch": 1.93, "grad_norm": 7.817753314971924, "learning_rate": 1.791241379310345e-06, "loss": 0.1404, "step": 196200 }, { "epoch": 1.93, "grad_norm": 8.523514747619629, "learning_rate": 1.7908103448275862e-06, "loss": 0.2911, "step": 196225 }, { "epoch": 1.93, "grad_norm": 6.584021091461182, "learning_rate": 1.7903793103448277e-06, "loss": 0.1195, "step": 196250 }, { "epoch": 1.93, "grad_norm": 5.519806861877441, "learning_rate": 1.789948275862069e-06, "loss": 0.2567, "step": 196275 }, { "epoch": 1.93, "grad_norm": 9.291537284851074, "learning_rate": 1.7895172413793104e-06, "loss": 0.1385, "step": 196300 }, { "epoch": 1.93, "grad_norm": 5.3060383796691895, "learning_rate": 1.789086206896552e-06, "loss": 0.2704, "step": 196325 }, { "epoch": 1.93, "grad_norm": 7.446920394897461, "learning_rate": 1.7886551724137933e-06, "loss": 0.1133, "step": 196350 }, { "epoch": 1.93, "grad_norm": 4.008365154266357, "learning_rate": 1.7882241379310348e-06, "loss": 0.2811, "step": 196375 }, { "epoch": 1.93, "grad_norm": 3.873692512512207, "learning_rate": 1.787793103448276e-06, "loss": 0.1367, "step": 196400 }, { "epoch": 1.93, "grad_norm": 4.5896525382995605, "learning_rate": 1.7873620689655175e-06, "loss": 0.2938, "step": 196425 }, { "epoch": 1.93, "grad_norm": 12.276272773742676, "learning_rate": 1.7869310344827587e-06, "loss": 0.1376, "step": 196450 }, { "epoch": 1.93, "grad_norm": 3.5590415000915527, "learning_rate": 1.7865000000000002e-06, "loss": 0.2652, "step": 196475 }, { "epoch": 1.93, "grad_norm": 5.010685920715332, "learning_rate": 1.7860689655172414e-06, "loss": 0.1501, "step": 196500 }, { "epoch": 1.93, "grad_norm": 5.092726707458496, "learning_rate": 1.7856379310344829e-06, "loss": 0.2971, "step": 196525 }, { "epoch": 1.93, "grad_norm": 9.45881462097168, "learning_rate": 1.7852068965517243e-06, "loss": 0.1443, "step": 196550 }, { "epoch": 1.93, "grad_norm": 4.955732822418213, "learning_rate": 1.7847758620689656e-06, "loss": 0.2703, "step": 196575 }, { "epoch": 1.93, "grad_norm": 10.228059768676758, "learning_rate": 1.7843448275862072e-06, "loss": 0.1729, "step": 196600 }, { "epoch": 1.93, "grad_norm": 3.202422857284546, "learning_rate": 1.7839137931034483e-06, "loss": 0.2808, "step": 196625 }, { "epoch": 1.93, "grad_norm": 11.510518074035645, "learning_rate": 1.78348275862069e-06, "loss": 0.1732, "step": 196650 }, { "epoch": 1.93, "grad_norm": 3.161634683609009, "learning_rate": 1.7830517241379312e-06, "loss": 0.2758, "step": 196675 }, { "epoch": 1.93, "grad_norm": 2.8185460567474365, "learning_rate": 1.7826206896551726e-06, "loss": 0.1393, "step": 196700 }, { "epoch": 1.93, "grad_norm": 4.790342807769775, "learning_rate": 1.7821896551724139e-06, "loss": 0.2967, "step": 196725 }, { "epoch": 1.94, "grad_norm": 6.628152847290039, "learning_rate": 1.7817586206896553e-06, "loss": 0.1458, "step": 196750 }, { "epoch": 1.94, "grad_norm": 4.528590679168701, "learning_rate": 1.7813275862068968e-06, "loss": 0.2641, "step": 196775 }, { "epoch": 1.94, "grad_norm": 7.875167369842529, "learning_rate": 1.780896551724138e-06, "loss": 0.1376, "step": 196800 }, { "epoch": 1.94, "grad_norm": 4.368308067321777, "learning_rate": 1.7804655172413795e-06, "loss": 0.2848, "step": 196825 }, { "epoch": 1.94, "grad_norm": 6.5006232261657715, "learning_rate": 1.7800344827586208e-06, "loss": 0.1389, "step": 196850 }, { "epoch": 1.94, "grad_norm": 3.9868035316467285, "learning_rate": 1.7796034482758622e-06, "loss": 0.2807, "step": 196875 }, { "epoch": 1.94, "grad_norm": 3.5354204177856445, "learning_rate": 1.7791724137931035e-06, "loss": 0.1229, "step": 196900 }, { "epoch": 1.94, "grad_norm": 4.41793966293335, "learning_rate": 1.7787413793103451e-06, "loss": 0.2368, "step": 196925 }, { "epoch": 1.94, "grad_norm": 9.536059379577637, "learning_rate": 1.7783103448275862e-06, "loss": 0.204, "step": 196950 }, { "epoch": 1.94, "grad_norm": 3.249039888381958, "learning_rate": 1.7778793103448278e-06, "loss": 0.2085, "step": 196975 }, { "epoch": 1.94, "grad_norm": 9.655837059020996, "learning_rate": 1.777448275862069e-06, "loss": 0.1575, "step": 197000 }, { "epoch": 1.94, "grad_norm": 3.9592690467834473, "learning_rate": 1.7770172413793105e-06, "loss": 0.2491, "step": 197025 }, { "epoch": 1.94, "grad_norm": 1.4415152072906494, "learning_rate": 1.776586206896552e-06, "loss": 0.1132, "step": 197050 }, { "epoch": 1.94, "grad_norm": 6.201278209686279, "learning_rate": 1.7761551724137932e-06, "loss": 0.2564, "step": 197075 }, { "epoch": 1.94, "grad_norm": 7.663712501525879, "learning_rate": 1.7757241379310347e-06, "loss": 0.1132, "step": 197100 }, { "epoch": 1.94, "grad_norm": 4.945854187011719, "learning_rate": 1.775293103448276e-06, "loss": 0.2778, "step": 197125 }, { "epoch": 1.94, "grad_norm": 5.0465497970581055, "learning_rate": 1.7748620689655174e-06, "loss": 0.1342, "step": 197150 }, { "epoch": 1.94, "grad_norm": 5.28085470199585, "learning_rate": 1.7744310344827586e-06, "loss": 0.2392, "step": 197175 }, { "epoch": 1.94, "grad_norm": 6.174591541290283, "learning_rate": 1.774e-06, "loss": 0.1486, "step": 197200 }, { "epoch": 1.94, "grad_norm": 4.381877899169922, "learning_rate": 1.7735689655172413e-06, "loss": 0.2649, "step": 197225 }, { "epoch": 1.94, "grad_norm": 10.470379829406738, "learning_rate": 1.773137931034483e-06, "loss": 0.143, "step": 197250 }, { "epoch": 1.94, "grad_norm": 5.722863674163818, "learning_rate": 1.7727068965517245e-06, "loss": 0.2871, "step": 197275 }, { "epoch": 1.94, "grad_norm": 7.791528224945068, "learning_rate": 1.7722758620689657e-06, "loss": 0.0817, "step": 197300 }, { "epoch": 1.94, "grad_norm": 4.661403179168701, "learning_rate": 1.7718448275862072e-06, "loss": 0.2702, "step": 197325 }, { "epoch": 1.94, "grad_norm": 7.676939487457275, "learning_rate": 1.7714137931034484e-06, "loss": 0.1228, "step": 197350 }, { "epoch": 1.94, "grad_norm": 5.983809947967529, "learning_rate": 1.7709827586206899e-06, "loss": 0.218, "step": 197375 }, { "epoch": 1.94, "grad_norm": 12.18900203704834, "learning_rate": 1.770551724137931e-06, "loss": 0.1498, "step": 197400 }, { "epoch": 1.94, "grad_norm": 3.904869794845581, "learning_rate": 1.7701206896551726e-06, "loss": 0.2602, "step": 197425 }, { "epoch": 1.94, "grad_norm": 13.286576271057129, "learning_rate": 1.7696896551724138e-06, "loss": 0.1563, "step": 197450 }, { "epoch": 1.94, "grad_norm": 5.8406662940979, "learning_rate": 1.7692586206896553e-06, "loss": 0.2642, "step": 197475 }, { "epoch": 1.94, "grad_norm": 6.762943267822266, "learning_rate": 1.7688275862068967e-06, "loss": 0.1449, "step": 197500 }, { "epoch": 1.94, "grad_norm": 3.8551723957061768, "learning_rate": 1.768396551724138e-06, "loss": 0.2625, "step": 197525 }, { "epoch": 1.94, "grad_norm": 11.279714584350586, "learning_rate": 1.7679655172413796e-06, "loss": 0.1355, "step": 197550 }, { "epoch": 1.94, "grad_norm": 4.019796371459961, "learning_rate": 1.7675344827586209e-06, "loss": 0.2379, "step": 197575 }, { "epoch": 1.94, "grad_norm": 8.16982364654541, "learning_rate": 1.7671034482758623e-06, "loss": 0.1343, "step": 197600 }, { "epoch": 1.94, "grad_norm": 4.703131675720215, "learning_rate": 1.7666724137931036e-06, "loss": 0.2719, "step": 197625 }, { "epoch": 1.94, "grad_norm": 6.161951065063477, "learning_rate": 1.766241379310345e-06, "loss": 0.1548, "step": 197650 }, { "epoch": 1.94, "grad_norm": 10.319600105285645, "learning_rate": 1.7658103448275863e-06, "loss": 0.298, "step": 197675 }, { "epoch": 1.94, "grad_norm": 13.040425300598145, "learning_rate": 1.7653793103448277e-06, "loss": 0.1611, "step": 197700 }, { "epoch": 1.94, "grad_norm": 4.444908142089844, "learning_rate": 1.7649482758620692e-06, "loss": 0.2276, "step": 197725 }, { "epoch": 1.94, "grad_norm": 9.976668357849121, "learning_rate": 1.7645172413793104e-06, "loss": 0.1558, "step": 197750 }, { "epoch": 1.95, "grad_norm": 5.103799819946289, "learning_rate": 1.7640862068965519e-06, "loss": 0.3164, "step": 197775 }, { "epoch": 1.95, "grad_norm": 8.852099418640137, "learning_rate": 1.7636551724137931e-06, "loss": 0.1189, "step": 197800 }, { "epoch": 1.95, "grad_norm": 4.2609052658081055, "learning_rate": 1.763241379310345e-06, "loss": 0.2813, "step": 197825 }, { "epoch": 1.95, "grad_norm": 7.107535362243652, "learning_rate": 1.7628103448275863e-06, "loss": 0.1501, "step": 197850 }, { "epoch": 1.95, "grad_norm": 4.865103721618652, "learning_rate": 1.7623793103448277e-06, "loss": 0.3022, "step": 197875 }, { "epoch": 1.95, "grad_norm": 9.170435905456543, "learning_rate": 1.761948275862069e-06, "loss": 0.136, "step": 197900 }, { "epoch": 1.95, "grad_norm": 6.8457255363464355, "learning_rate": 1.7615172413793104e-06, "loss": 0.2979, "step": 197925 }, { "epoch": 1.95, "grad_norm": 8.480695724487305, "learning_rate": 1.7610862068965517e-06, "loss": 0.1602, "step": 197950 }, { "epoch": 1.95, "grad_norm": 5.472748279571533, "learning_rate": 1.7606551724137933e-06, "loss": 0.2429, "step": 197975 }, { "epoch": 1.95, "grad_norm": 3.722736120223999, "learning_rate": 1.7602241379310348e-06, "loss": 0.1129, "step": 198000 }, { "epoch": 1.95, "grad_norm": 4.003249645233154, "learning_rate": 1.759793103448276e-06, "loss": 0.2436, "step": 198025 }, { "epoch": 1.95, "grad_norm": 6.519516468048096, "learning_rate": 1.7593620689655175e-06, "loss": 0.1403, "step": 198050 }, { "epoch": 1.95, "grad_norm": 3.1265366077423096, "learning_rate": 1.7589310344827587e-06, "loss": 0.2784, "step": 198075 }, { "epoch": 1.95, "grad_norm": 23.072406768798828, "learning_rate": 1.7585000000000002e-06, "loss": 0.1549, "step": 198100 }, { "epoch": 1.95, "grad_norm": 5.155007839202881, "learning_rate": 1.7580689655172414e-06, "loss": 0.2947, "step": 198125 }, { "epoch": 1.95, "grad_norm": 4.327389717102051, "learning_rate": 1.7576379310344829e-06, "loss": 0.1192, "step": 198150 }, { "epoch": 1.95, "grad_norm": 4.8151140213012695, "learning_rate": 1.7572068965517241e-06, "loss": 0.2532, "step": 198175 }, { "epoch": 1.95, "grad_norm": 4.027538776397705, "learning_rate": 1.7567758620689656e-06, "loss": 0.171, "step": 198200 }, { "epoch": 1.95, "grad_norm": 7.407492637634277, "learning_rate": 1.7563448275862073e-06, "loss": 0.2955, "step": 198225 }, { "epoch": 1.95, "grad_norm": 5.602211952209473, "learning_rate": 1.7559137931034483e-06, "loss": 0.13, "step": 198250 }, { "epoch": 1.95, "grad_norm": 6.165182113647461, "learning_rate": 1.75548275862069e-06, "loss": 0.2808, "step": 198275 }, { "epoch": 1.95, "grad_norm": 6.725157737731934, "learning_rate": 1.7550517241379312e-06, "loss": 0.1324, "step": 198300 }, { "epoch": 1.95, "grad_norm": 3.579603672027588, "learning_rate": 1.7546206896551727e-06, "loss": 0.2627, "step": 198325 }, { "epoch": 1.95, "grad_norm": 7.893467426300049, "learning_rate": 1.754189655172414e-06, "loss": 0.128, "step": 198350 }, { "epoch": 1.95, "grad_norm": 3.7173550128936768, "learning_rate": 1.7537586206896554e-06, "loss": 0.2832, "step": 198375 }, { "epoch": 1.95, "grad_norm": 4.59114933013916, "learning_rate": 1.7533275862068966e-06, "loss": 0.1523, "step": 198400 }, { "epoch": 1.95, "grad_norm": 7.0193963050842285, "learning_rate": 1.752896551724138e-06, "loss": 0.2796, "step": 198425 }, { "epoch": 1.95, "grad_norm": 10.86898422241211, "learning_rate": 1.7524655172413795e-06, "loss": 0.166, "step": 198450 }, { "epoch": 1.95, "grad_norm": 3.936497449874878, "learning_rate": 1.7520344827586208e-06, "loss": 0.2366, "step": 198475 }, { "epoch": 1.95, "grad_norm": 8.15971565246582, "learning_rate": 1.7516034482758622e-06, "loss": 0.1198, "step": 198500 }, { "epoch": 1.95, "grad_norm": 4.5225958824157715, "learning_rate": 1.7511724137931035e-06, "loss": 0.2444, "step": 198525 }, { "epoch": 1.95, "grad_norm": 7.118875980377197, "learning_rate": 1.7507413793103451e-06, "loss": 0.1235, "step": 198550 }, { "epoch": 1.95, "grad_norm": 4.316490650177002, "learning_rate": 1.7503103448275862e-06, "loss": 0.306, "step": 198575 }, { "epoch": 1.95, "grad_norm": 9.262860298156738, "learning_rate": 1.7498793103448278e-06, "loss": 0.1621, "step": 198600 }, { "epoch": 1.95, "grad_norm": 3.6791210174560547, "learning_rate": 1.749448275862069e-06, "loss": 0.2302, "step": 198625 }, { "epoch": 1.95, "grad_norm": 11.22046947479248, "learning_rate": 1.7490172413793105e-06, "loss": 0.11, "step": 198650 }, { "epoch": 1.95, "grad_norm": 3.1753089427948, "learning_rate": 1.7485862068965518e-06, "loss": 0.2966, "step": 198675 }, { "epoch": 1.95, "grad_norm": 9.269630432128906, "learning_rate": 1.7481551724137932e-06, "loss": 0.1394, "step": 198700 }, { "epoch": 1.95, "grad_norm": 5.346479892730713, "learning_rate": 1.7477241379310347e-06, "loss": 0.2371, "step": 198725 }, { "epoch": 1.95, "grad_norm": 8.180038452148438, "learning_rate": 1.747293103448276e-06, "loss": 0.1503, "step": 198750 }, { "epoch": 1.96, "grad_norm": 3.8867604732513428, "learning_rate": 1.7468620689655174e-06, "loss": 0.2372, "step": 198775 }, { "epoch": 1.96, "grad_norm": 17.55785369873047, "learning_rate": 1.7464310344827586e-06, "loss": 0.1723, "step": 198800 }, { "epoch": 1.96, "grad_norm": 6.518655776977539, "learning_rate": 1.746e-06, "loss": 0.2699, "step": 198825 }, { "epoch": 1.96, "grad_norm": 9.185893058776855, "learning_rate": 1.7455689655172413e-06, "loss": 0.1521, "step": 198850 }, { "epoch": 1.96, "grad_norm": 7.230334281921387, "learning_rate": 1.745137931034483e-06, "loss": 0.3126, "step": 198875 }, { "epoch": 1.96, "grad_norm": 6.546151638031006, "learning_rate": 1.7447068965517243e-06, "loss": 0.1516, "step": 198900 }, { "epoch": 1.96, "grad_norm": 3.9990882873535156, "learning_rate": 1.7442758620689657e-06, "loss": 0.2837, "step": 198925 }, { "epoch": 1.96, "grad_norm": 14.54367446899414, "learning_rate": 1.7438448275862072e-06, "loss": 0.1672, "step": 198950 }, { "epoch": 1.96, "grad_norm": 4.0110907554626465, "learning_rate": 1.7434137931034484e-06, "loss": 0.2723, "step": 198975 }, { "epoch": 1.96, "grad_norm": 7.215005397796631, "learning_rate": 1.7429827586206899e-06, "loss": 0.1392, "step": 199000 }, { "epoch": 1.96, "grad_norm": 4.7032670974731445, "learning_rate": 1.7425517241379311e-06, "loss": 0.264, "step": 199025 }, { "epoch": 1.96, "grad_norm": 5.880415916442871, "learning_rate": 1.7421206896551726e-06, "loss": 0.1397, "step": 199050 }, { "epoch": 1.96, "grad_norm": 5.409276008605957, "learning_rate": 1.7416896551724138e-06, "loss": 0.2764, "step": 199075 }, { "epoch": 1.96, "grad_norm": 8.736837387084961, "learning_rate": 1.7412586206896553e-06, "loss": 0.1574, "step": 199100 }, { "epoch": 1.96, "grad_norm": 3.646768569946289, "learning_rate": 1.7408275862068965e-06, "loss": 0.2825, "step": 199125 }, { "epoch": 1.96, "grad_norm": 5.094798564910889, "learning_rate": 1.740396551724138e-06, "loss": 0.1339, "step": 199150 }, { "epoch": 1.96, "grad_norm": 12.224867820739746, "learning_rate": 1.7399655172413796e-06, "loss": 0.2714, "step": 199175 }, { "epoch": 1.96, "grad_norm": 8.133588790893555, "learning_rate": 1.7395344827586209e-06, "loss": 0.1232, "step": 199200 }, { "epoch": 1.96, "grad_norm": 5.151493549346924, "learning_rate": 1.7391034482758623e-06, "loss": 0.2561, "step": 199225 }, { "epoch": 1.96, "grad_norm": 7.139684200286865, "learning_rate": 1.7386724137931036e-06, "loss": 0.15, "step": 199250 }, { "epoch": 1.96, "grad_norm": 4.524624347686768, "learning_rate": 1.738241379310345e-06, "loss": 0.3165, "step": 199275 }, { "epoch": 1.96, "grad_norm": 10.145092964172363, "learning_rate": 1.7378103448275863e-06, "loss": 0.1419, "step": 199300 }, { "epoch": 1.96, "grad_norm": 3.825131416320801, "learning_rate": 1.7373793103448277e-06, "loss": 0.2947, "step": 199325 }, { "epoch": 1.96, "grad_norm": 7.235652923583984, "learning_rate": 1.736948275862069e-06, "loss": 0.1293, "step": 199350 }, { "epoch": 1.96, "grad_norm": 4.049701690673828, "learning_rate": 1.7365172413793104e-06, "loss": 0.2691, "step": 199375 }, { "epoch": 1.96, "grad_norm": 7.850808620452881, "learning_rate": 1.736086206896552e-06, "loss": 0.127, "step": 199400 }, { "epoch": 1.96, "grad_norm": 3.807176113128662, "learning_rate": 1.7356551724137931e-06, "loss": 0.2323, "step": 199425 }, { "epoch": 1.96, "grad_norm": 11.744002342224121, "learning_rate": 1.7352241379310348e-06, "loss": 0.1491, "step": 199450 }, { "epoch": 1.96, "grad_norm": 4.250727653503418, "learning_rate": 1.7347931034482758e-06, "loss": 0.2383, "step": 199475 }, { "epoch": 1.96, "grad_norm": 6.067199230194092, "learning_rate": 1.7343620689655175e-06, "loss": 0.1402, "step": 199500 }, { "epoch": 1.96, "grad_norm": 12.458683967590332, "learning_rate": 1.7339310344827588e-06, "loss": 0.2842, "step": 199525 }, { "epoch": 1.96, "grad_norm": 8.877060890197754, "learning_rate": 1.7335000000000002e-06, "loss": 0.1556, "step": 199550 }, { "epoch": 1.96, "grad_norm": 4.733433723449707, "learning_rate": 1.7330689655172415e-06, "loss": 0.3082, "step": 199575 }, { "epoch": 1.96, "grad_norm": 6.815999984741211, "learning_rate": 1.732637931034483e-06, "loss": 0.1702, "step": 199600 }, { "epoch": 1.96, "grad_norm": 4.775666236877441, "learning_rate": 1.7322068965517244e-06, "loss": 0.2668, "step": 199625 }, { "epoch": 1.96, "grad_norm": 8.308064460754395, "learning_rate": 1.7317758620689656e-06, "loss": 0.1435, "step": 199650 }, { "epoch": 1.96, "grad_norm": 5.812121391296387, "learning_rate": 1.731344827586207e-06, "loss": 0.279, "step": 199675 }, { "epoch": 1.96, "grad_norm": 8.769172668457031, "learning_rate": 1.7309137931034483e-06, "loss": 0.1488, "step": 199700 }, { "epoch": 1.96, "grad_norm": 3.9042422771453857, "learning_rate": 1.7304827586206898e-06, "loss": 0.2361, "step": 199725 }, { "epoch": 1.96, "grad_norm": 10.592672348022461, "learning_rate": 1.730051724137931e-06, "loss": 0.1354, "step": 199750 }, { "epoch": 1.96, "grad_norm": 3.8691492080688477, "learning_rate": 1.7296206896551727e-06, "loss": 0.2534, "step": 199775 }, { "epoch": 1.97, "grad_norm": 7.7545061111450195, "learning_rate": 1.7291896551724137e-06, "loss": 0.1332, "step": 199800 }, { "epoch": 1.97, "grad_norm": 5.666599750518799, "learning_rate": 1.7287758620689656e-06, "loss": 0.2574, "step": 199825 }, { "epoch": 1.97, "grad_norm": 8.890854835510254, "learning_rate": 1.7283448275862068e-06, "loss": 0.1222, "step": 199850 }, { "epoch": 1.97, "grad_norm": 4.745638370513916, "learning_rate": 1.7279137931034485e-06, "loss": 0.2566, "step": 199875 }, { "epoch": 1.97, "grad_norm": 7.203958511352539, "learning_rate": 1.72748275862069e-06, "loss": 0.1128, "step": 199900 }, { "epoch": 1.97, "grad_norm": 5.91631555557251, "learning_rate": 1.7270517241379312e-06, "loss": 0.292, "step": 199925 }, { "epoch": 1.97, "grad_norm": 8.888168334960938, "learning_rate": 1.7266206896551727e-06, "loss": 0.1478, "step": 199950 }, { "epoch": 1.97, "grad_norm": 4.43589448928833, "learning_rate": 1.726189655172414e-06, "loss": 0.2388, "step": 199975 }, { "epoch": 1.97, "grad_norm": 12.950846672058105, "learning_rate": 1.7257586206896554e-06, "loss": 0.1525, "step": 200000 }, { "epoch": 1.97, "eval_loss": 0.4856625199317932, "eval_runtime": 5737.2014, "eval_samples_per_second": 1.65, "eval_steps_per_second": 0.206, "eval_wer": 0.11940706882121933, "step": 200000 }, { "epoch": 1.97, "grad_norm": 3.484098196029663, "learning_rate": 1.7253275862068966e-06, "loss": 0.2751, "step": 200025 }, { "epoch": 1.97, "grad_norm": 7.218255996704102, "learning_rate": 1.724896551724138e-06, "loss": 0.1627, "step": 200050 }, { "epoch": 1.97, "grad_norm": 4.880028247833252, "learning_rate": 1.7244655172413793e-06, "loss": 0.303, "step": 200075 }, { "epoch": 1.97, "grad_norm": 7.093042850494385, "learning_rate": 1.7240344827586208e-06, "loss": 0.152, "step": 200100 }, { "epoch": 1.97, "grad_norm": 3.700575590133667, "learning_rate": 1.7236034482758624e-06, "loss": 0.3067, "step": 200125 }, { "epoch": 1.97, "grad_norm": 14.139238357543945, "learning_rate": 1.7231724137931035e-06, "loss": 0.1591, "step": 200150 }, { "epoch": 1.97, "grad_norm": 6.008151054382324, "learning_rate": 1.7227413793103451e-06, "loss": 0.25, "step": 200175 }, { "epoch": 1.97, "grad_norm": 10.483464241027832, "learning_rate": 1.7223103448275864e-06, "loss": 0.172, "step": 200200 }, { "epoch": 1.97, "grad_norm": 3.184581756591797, "learning_rate": 1.7218793103448278e-06, "loss": 0.2605, "step": 200225 }, { "epoch": 1.97, "grad_norm": 11.398233413696289, "learning_rate": 1.721448275862069e-06, "loss": 0.1477, "step": 200250 }, { "epoch": 1.97, "grad_norm": 6.349982738494873, "learning_rate": 1.7210172413793106e-06, "loss": 0.2766, "step": 200275 }, { "epoch": 1.97, "grad_norm": 6.085355281829834, "learning_rate": 1.7205862068965518e-06, "loss": 0.1158, "step": 200300 }, { "epoch": 1.97, "grad_norm": 3.826913833618164, "learning_rate": 1.7201551724137933e-06, "loss": 0.2448, "step": 200325 }, { "epoch": 1.97, "grad_norm": 1.5685410499572754, "learning_rate": 1.7197241379310345e-06, "loss": 0.1257, "step": 200350 }, { "epoch": 1.97, "grad_norm": 4.204770088195801, "learning_rate": 1.719293103448276e-06, "loss": 0.2443, "step": 200375 }, { "epoch": 1.97, "grad_norm": 11.235437393188477, "learning_rate": 1.7188620689655174e-06, "loss": 0.1223, "step": 200400 }, { "epoch": 1.97, "grad_norm": 4.576160907745361, "learning_rate": 1.7184310344827587e-06, "loss": 0.2541, "step": 200425 }, { "epoch": 1.97, "grad_norm": 6.3603515625, "learning_rate": 1.7180000000000003e-06, "loss": 0.145, "step": 200450 }, { "epoch": 1.97, "grad_norm": 5.360896587371826, "learning_rate": 1.7175689655172414e-06, "loss": 0.2778, "step": 200475 }, { "epoch": 1.97, "grad_norm": 7.1524977684021, "learning_rate": 1.717137931034483e-06, "loss": 0.148, "step": 200500 }, { "epoch": 1.97, "grad_norm": 5.039687633514404, "learning_rate": 1.7167068965517243e-06, "loss": 0.2984, "step": 200525 }, { "epoch": 1.97, "grad_norm": 4.221446514129639, "learning_rate": 1.7162758620689657e-06, "loss": 0.1232, "step": 200550 }, { "epoch": 1.97, "grad_norm": 5.090327739715576, "learning_rate": 1.715844827586207e-06, "loss": 0.2673, "step": 200575 }, { "epoch": 1.97, "grad_norm": 8.244966506958008, "learning_rate": 1.7154137931034484e-06, "loss": 0.1331, "step": 200600 }, { "epoch": 1.97, "grad_norm": 5.106537818908691, "learning_rate": 1.7149827586206899e-06, "loss": 0.2974, "step": 200625 }, { "epoch": 1.97, "grad_norm": 11.237733840942383, "learning_rate": 1.7145517241379311e-06, "loss": 0.1369, "step": 200650 }, { "epoch": 1.97, "grad_norm": 3.0577120780944824, "learning_rate": 1.7141206896551726e-06, "loss": 0.2317, "step": 200675 }, { "epoch": 1.97, "grad_norm": 7.593776702880859, "learning_rate": 1.7136896551724138e-06, "loss": 0.147, "step": 200700 }, { "epoch": 1.97, "grad_norm": 5.298450946807861, "learning_rate": 1.7132586206896553e-06, "loss": 0.2911, "step": 200725 }, { "epoch": 1.97, "grad_norm": 1.377912163734436, "learning_rate": 1.7128275862068965e-06, "loss": 0.1829, "step": 200750 }, { "epoch": 1.97, "grad_norm": 5.097541809082031, "learning_rate": 1.7123965517241382e-06, "loss": 0.2578, "step": 200775 }, { "epoch": 1.97, "grad_norm": 8.297704696655273, "learning_rate": 1.7119655172413792e-06, "loss": 0.164, "step": 200800 }, { "epoch": 1.98, "grad_norm": 5.483388900756836, "learning_rate": 1.711534482758621e-06, "loss": 0.2512, "step": 200825 }, { "epoch": 1.98, "grad_norm": 3.8833820819854736, "learning_rate": 1.7111034482758624e-06, "loss": 0.1493, "step": 200850 }, { "epoch": 1.98, "grad_norm": 3.988990068435669, "learning_rate": 1.7106724137931036e-06, "loss": 0.2447, "step": 200875 }, { "epoch": 1.98, "grad_norm": 10.148354530334473, "learning_rate": 1.710241379310345e-06, "loss": 0.1181, "step": 200900 }, { "epoch": 1.98, "grad_norm": 4.902710914611816, "learning_rate": 1.7098103448275863e-06, "loss": 0.2967, "step": 200925 }, { "epoch": 1.98, "grad_norm": 9.682073593139648, "learning_rate": 1.7093793103448278e-06, "loss": 0.1526, "step": 200950 }, { "epoch": 1.98, "grad_norm": 4.732014179229736, "learning_rate": 1.708948275862069e-06, "loss": 0.2699, "step": 200975 }, { "epoch": 1.98, "grad_norm": 3.89726185798645, "learning_rate": 1.7085172413793105e-06, "loss": 0.1379, "step": 201000 }, { "epoch": 1.98, "grad_norm": 5.17404842376709, "learning_rate": 1.7080862068965517e-06, "loss": 0.3104, "step": 201025 }, { "epoch": 1.98, "grad_norm": 2.6378159523010254, "learning_rate": 1.7076551724137932e-06, "loss": 0.154, "step": 201050 }, { "epoch": 1.98, "grad_norm": 4.696566581726074, "learning_rate": 1.7072241379310348e-06, "loss": 0.2851, "step": 201075 }, { "epoch": 1.98, "grad_norm": 10.023045539855957, "learning_rate": 1.706793103448276e-06, "loss": 0.1643, "step": 201100 }, { "epoch": 1.98, "grad_norm": 5.420064449310303, "learning_rate": 1.7063620689655175e-06, "loss": 0.2624, "step": 201125 }, { "epoch": 1.98, "grad_norm": 5.369200229644775, "learning_rate": 1.7059310344827588e-06, "loss": 0.1564, "step": 201150 }, { "epoch": 1.98, "grad_norm": 4.932041168212891, "learning_rate": 1.7055000000000002e-06, "loss": 0.2734, "step": 201175 }, { "epoch": 1.98, "grad_norm": 6.286355018615723, "learning_rate": 1.7050689655172415e-06, "loss": 0.1639, "step": 201200 }, { "epoch": 1.98, "grad_norm": 4.009503364562988, "learning_rate": 1.704637931034483e-06, "loss": 0.2707, "step": 201225 }, { "epoch": 1.98, "grad_norm": 8.977554321289062, "learning_rate": 1.7042068965517242e-06, "loss": 0.1219, "step": 201250 }, { "epoch": 1.98, "grad_norm": 4.311797142028809, "learning_rate": 1.7037758620689656e-06, "loss": 0.2703, "step": 201275 }, { "epoch": 1.98, "grad_norm": 5.106343746185303, "learning_rate": 1.703344827586207e-06, "loss": 0.1718, "step": 201300 }, { "epoch": 1.98, "grad_norm": 6.3236517906188965, "learning_rate": 1.7029137931034483e-06, "loss": 0.2909, "step": 201325 }, { "epoch": 1.98, "grad_norm": 15.216452598571777, "learning_rate": 1.70248275862069e-06, "loss": 0.1444, "step": 201350 }, { "epoch": 1.98, "grad_norm": 3.979546546936035, "learning_rate": 1.702051724137931e-06, "loss": 0.2466, "step": 201375 }, { "epoch": 1.98, "grad_norm": 12.561139106750488, "learning_rate": 1.7016206896551727e-06, "loss": 0.1122, "step": 201400 }, { "epoch": 1.98, "grad_norm": 3.934603691101074, "learning_rate": 1.701189655172414e-06, "loss": 0.251, "step": 201425 }, { "epoch": 1.98, "grad_norm": 5.798051357269287, "learning_rate": 1.7007586206896554e-06, "loss": 0.1387, "step": 201450 }, { "epoch": 1.98, "grad_norm": 4.836248874664307, "learning_rate": 1.7003275862068966e-06, "loss": 0.2426, "step": 201475 }, { "epoch": 1.98, "grad_norm": 6.150472640991211, "learning_rate": 1.699896551724138e-06, "loss": 0.1339, "step": 201500 }, { "epoch": 1.98, "grad_norm": 3.4258432388305664, "learning_rate": 1.6994655172413794e-06, "loss": 0.2643, "step": 201525 }, { "epoch": 1.98, "grad_norm": 9.50852108001709, "learning_rate": 1.6990344827586208e-06, "loss": 0.1476, "step": 201550 }, { "epoch": 1.98, "grad_norm": 3.767383575439453, "learning_rate": 1.6986034482758623e-06, "loss": 0.2496, "step": 201575 }, { "epoch": 1.98, "grad_norm": 1.5324320793151855, "learning_rate": 1.6981724137931035e-06, "loss": 0.1202, "step": 201600 }, { "epoch": 1.98, "grad_norm": 3.7156317234039307, "learning_rate": 1.697741379310345e-06, "loss": 0.2566, "step": 201625 }, { "epoch": 1.98, "grad_norm": 8.808755874633789, "learning_rate": 1.6973103448275862e-06, "loss": 0.1332, "step": 201650 }, { "epoch": 1.98, "grad_norm": 4.507904052734375, "learning_rate": 1.6968793103448279e-06, "loss": 0.2692, "step": 201675 }, { "epoch": 1.98, "grad_norm": 7.6702117919921875, "learning_rate": 1.696448275862069e-06, "loss": 0.1724, "step": 201700 }, { "epoch": 1.98, "grad_norm": 5.826809406280518, "learning_rate": 1.6960172413793106e-06, "loss": 0.2835, "step": 201725 }, { "epoch": 1.98, "grad_norm": 11.502119064331055, "learning_rate": 1.6955862068965518e-06, "loss": 0.1385, "step": 201750 }, { "epoch": 1.98, "grad_norm": 5.040323257446289, "learning_rate": 1.6951551724137933e-06, "loss": 0.2488, "step": 201775 }, { "epoch": 1.98, "grad_norm": 7.801357269287109, "learning_rate": 1.6947241379310347e-06, "loss": 0.1265, "step": 201800 }, { "epoch": 1.99, "grad_norm": 3.950183153152466, "learning_rate": 1.694293103448276e-06, "loss": 0.2583, "step": 201825 }, { "epoch": 1.99, "grad_norm": 3.1261274814605713, "learning_rate": 1.6938620689655174e-06, "loss": 0.1422, "step": 201850 }, { "epoch": 1.99, "grad_norm": 4.748574733734131, "learning_rate": 1.6934310344827587e-06, "loss": 0.2705, "step": 201875 }, { "epoch": 1.99, "grad_norm": 7.182480812072754, "learning_rate": 1.6930000000000001e-06, "loss": 0.1373, "step": 201900 }, { "epoch": 1.99, "grad_norm": 5.014307498931885, "learning_rate": 1.6925689655172414e-06, "loss": 0.2348, "step": 201925 }, { "epoch": 1.99, "grad_norm": 8.246784210205078, "learning_rate": 1.6921379310344828e-06, "loss": 0.1642, "step": 201950 }, { "epoch": 1.99, "grad_norm": 5.680517196655273, "learning_rate": 1.691706896551724e-06, "loss": 0.2531, "step": 201975 }, { "epoch": 1.99, "grad_norm": 6.518174648284912, "learning_rate": 1.6912758620689658e-06, "loss": 0.1636, "step": 202000 }, { "epoch": 1.99, "grad_norm": 5.738340377807617, "learning_rate": 1.6908448275862072e-06, "loss": 0.3411, "step": 202025 }, { "epoch": 1.99, "grad_norm": 7.943357467651367, "learning_rate": 1.6904137931034485e-06, "loss": 0.1504, "step": 202050 }, { "epoch": 1.99, "grad_norm": 4.531459808349609, "learning_rate": 1.68998275862069e-06, "loss": 0.2821, "step": 202075 }, { "epoch": 1.99, "grad_norm": 5.9574174880981445, "learning_rate": 1.6895517241379312e-06, "loss": 0.1551, "step": 202100 }, { "epoch": 1.99, "grad_norm": 3.883618116378784, "learning_rate": 1.689137931034483e-06, "loss": 0.279, "step": 202125 }, { "epoch": 1.99, "grad_norm": 5.388757705688477, "learning_rate": 1.6887068965517243e-06, "loss": 0.1432, "step": 202150 }, { "epoch": 1.99, "grad_norm": 5.070071697235107, "learning_rate": 1.6882758620689657e-06, "loss": 0.2584, "step": 202175 }, { "epoch": 1.99, "grad_norm": 10.643110275268555, "learning_rate": 1.687844827586207e-06, "loss": 0.1413, "step": 202200 }, { "epoch": 1.99, "grad_norm": 4.407582759857178, "learning_rate": 1.6874137931034484e-06, "loss": 0.3222, "step": 202225 }, { "epoch": 1.99, "grad_norm": 13.201109886169434, "learning_rate": 1.6869827586206897e-06, "loss": 0.1664, "step": 202250 }, { "epoch": 1.99, "grad_norm": 7.796724319458008, "learning_rate": 1.6865517241379311e-06, "loss": 0.2288, "step": 202275 }, { "epoch": 1.99, "grad_norm": 7.328008651733398, "learning_rate": 1.6861206896551726e-06, "loss": 0.1534, "step": 202300 }, { "epoch": 1.99, "grad_norm": 2.7424936294555664, "learning_rate": 1.6856896551724138e-06, "loss": 0.2979, "step": 202325 }, { "epoch": 1.99, "grad_norm": 6.906245708465576, "learning_rate": 1.6852586206896553e-06, "loss": 0.1513, "step": 202350 }, { "epoch": 1.99, "grad_norm": 5.115962982177734, "learning_rate": 1.6848275862068965e-06, "loss": 0.235, "step": 202375 }, { "epoch": 1.99, "grad_norm": 6.274560928344727, "learning_rate": 1.6843965517241382e-06, "loss": 0.163, "step": 202400 }, { "epoch": 1.99, "grad_norm": 3.353764057159424, "learning_rate": 1.6839655172413795e-06, "loss": 0.2184, "step": 202425 }, { "epoch": 1.99, "grad_norm": 15.055291175842285, "learning_rate": 1.683534482758621e-06, "loss": 0.1168, "step": 202450 }, { "epoch": 1.99, "grad_norm": 5.7039475440979, "learning_rate": 1.6831034482758622e-06, "loss": 0.2347, "step": 202475 }, { "epoch": 1.99, "grad_norm": 6.465416431427002, "learning_rate": 1.6826724137931036e-06, "loss": 0.1455, "step": 202500 }, { "epoch": 1.99, "grad_norm": 5.659328937530518, "learning_rate": 1.682241379310345e-06, "loss": 0.2375, "step": 202525 }, { "epoch": 1.99, "grad_norm": 10.259611129760742, "learning_rate": 1.6818103448275863e-06, "loss": 0.0956, "step": 202550 }, { "epoch": 1.99, "grad_norm": 3.61875319480896, "learning_rate": 1.6813793103448278e-06, "loss": 0.26, "step": 202575 }, { "epoch": 1.99, "grad_norm": 3.182036876678467, "learning_rate": 1.680948275862069e-06, "loss": 0.1415, "step": 202600 }, { "epoch": 1.99, "grad_norm": 6.758996486663818, "learning_rate": 1.6805172413793105e-06, "loss": 0.2661, "step": 202625 }, { "epoch": 1.99, "grad_norm": 31.634117126464844, "learning_rate": 1.6800862068965517e-06, "loss": 0.1056, "step": 202650 }, { "epoch": 1.99, "grad_norm": 5.5407328605651855, "learning_rate": 1.6796551724137932e-06, "loss": 0.2483, "step": 202675 }, { "epoch": 1.99, "grad_norm": 6.208924770355225, "learning_rate": 1.6792241379310344e-06, "loss": 0.1189, "step": 202700 }, { "epoch": 1.99, "grad_norm": 3.7468252182006836, "learning_rate": 1.678793103448276e-06, "loss": 0.2678, "step": 202725 }, { "epoch": 1.99, "grad_norm": 2.7842977046966553, "learning_rate": 1.6783620689655175e-06, "loss": 0.1195, "step": 202750 }, { "epoch": 1.99, "grad_norm": 5.739307880401611, "learning_rate": 1.6779310344827588e-06, "loss": 0.2788, "step": 202775 }, { "epoch": 1.99, "grad_norm": 10.530017852783203, "learning_rate": 1.6775000000000002e-06, "loss": 0.1448, "step": 202800 }, { "epoch": 1.99, "grad_norm": 5.2388014793396, "learning_rate": 1.6770689655172415e-06, "loss": 0.2536, "step": 202825 }, { "epoch": 2.0, "grad_norm": 8.113033294677734, "learning_rate": 1.676637931034483e-06, "loss": 0.1818, "step": 202850 }, { "epoch": 2.0, "grad_norm": 3.532263994216919, "learning_rate": 1.6762068965517242e-06, "loss": 0.2838, "step": 202875 }, { "epoch": 2.0, "grad_norm": 5.386137962341309, "learning_rate": 1.6757758620689656e-06, "loss": 0.1376, "step": 202900 }, { "epoch": 2.0, "grad_norm": 7.018639087677002, "learning_rate": 1.6753448275862069e-06, "loss": 0.265, "step": 202925 }, { "epoch": 2.0, "grad_norm": 9.287501335144043, "learning_rate": 1.6749137931034483e-06, "loss": 0.1442, "step": 202950 }, { "epoch": 2.0, "grad_norm": 6.200946807861328, "learning_rate": 1.67448275862069e-06, "loss": 0.2997, "step": 202975 }, { "epoch": 2.0, "grad_norm": 1.4370486736297607, "learning_rate": 1.6740517241379313e-06, "loss": 0.1184, "step": 203000 }, { "epoch": 2.0, "grad_norm": 5.972302436828613, "learning_rate": 1.6736206896551727e-06, "loss": 0.2957, "step": 203025 }, { "epoch": 2.0, "grad_norm": 4.790600299835205, "learning_rate": 1.673189655172414e-06, "loss": 0.1364, "step": 203050 }, { "epoch": 2.0, "grad_norm": 4.846665382385254, "learning_rate": 1.6727586206896554e-06, "loss": 0.3632, "step": 203075 }, { "epoch": 2.0, "grad_norm": 6.770423889160156, "learning_rate": 1.6723275862068967e-06, "loss": 0.1389, "step": 203100 }, { "epoch": 2.0, "grad_norm": 3.4634504318237305, "learning_rate": 1.6718965517241381e-06, "loss": 0.2439, "step": 203125 }, { "epoch": 2.0, "grad_norm": 7.509830474853516, "learning_rate": 1.6714655172413794e-06, "loss": 0.1349, "step": 203150 }, { "epoch": 2.0, "grad_norm": 5.170925140380859, "learning_rate": 1.6710344827586208e-06, "loss": 0.2359, "step": 203175 }, { "epoch": 2.0, "grad_norm": 2.8909716606140137, "learning_rate": 1.670603448275862e-06, "loss": 0.1457, "step": 203200 }, { "epoch": 2.0, "grad_norm": 5.991971492767334, "learning_rate": 1.6701724137931035e-06, "loss": 0.3128, "step": 203225 }, { "epoch": 2.0, "grad_norm": 2.188373327255249, "learning_rate": 1.669741379310345e-06, "loss": 0.1501, "step": 203250 }, { "epoch": 2.0, "grad_norm": 4.664975166320801, "learning_rate": 1.6693103448275862e-06, "loss": 0.283, "step": 203275 }, { "epoch": 2.0, "grad_norm": 8.920903205871582, "learning_rate": 1.6688793103448279e-06, "loss": 0.1624, "step": 203300 }, { "epoch": 2.0, "grad_norm": 4.30737829208374, "learning_rate": 1.6684482758620691e-06, "loss": 0.2768, "step": 203325 }, { "epoch": 2.0, "grad_norm": 3.8554775714874268, "learning_rate": 1.6680172413793106e-06, "loss": 0.1614, "step": 203350 }, { "epoch": 2.0, "grad_norm": 11.975000381469727, "learning_rate": 1.6675862068965518e-06, "loss": 0.1046, "step": 203375 }, { "epoch": 2.0, "grad_norm": 2.842888593673706, "learning_rate": 1.6671551724137933e-06, "loss": 0.1864, "step": 203400 }, { "epoch": 2.0, "grad_norm": 4.997286796569824, "learning_rate": 1.6667241379310345e-06, "loss": 0.0912, "step": 203425 }, { "epoch": 2.0, "grad_norm": 5.941993236541748, "learning_rate": 1.666293103448276e-06, "loss": 0.2137, "step": 203450 }, { "epoch": 2.0, "grad_norm": 3.775590419769287, "learning_rate": 1.6658620689655175e-06, "loss": 0.1036, "step": 203475 }, { "epoch": 2.0, "grad_norm": 4.232646465301514, "learning_rate": 1.6654310344827587e-06, "loss": 0.2186, "step": 203500 }, { "epoch": 2.0, "grad_norm": 5.168502330780029, "learning_rate": 1.6650000000000002e-06, "loss": 0.0826, "step": 203525 }, { "epoch": 2.0, "grad_norm": 3.805983543395996, "learning_rate": 1.6645689655172414e-06, "loss": 0.2001, "step": 203550 }, { "epoch": 2.0, "grad_norm": 2.315549850463867, "learning_rate": 1.6641379310344829e-06, "loss": 0.0937, "step": 203575 }, { "epoch": 2.0, "grad_norm": 5.022488594055176, "learning_rate": 1.663706896551724e-06, "loss": 0.2287, "step": 203600 }, { "epoch": 2.0, "grad_norm": 6.419956684112549, "learning_rate": 1.6632758620689658e-06, "loss": 0.1008, "step": 203625 }, { "epoch": 2.0, "grad_norm": 3.9084155559539795, "learning_rate": 1.662844827586207e-06, "loss": 0.1765, "step": 203650 }, { "epoch": 2.0, "grad_norm": 10.498169898986816, "learning_rate": 1.6624137931034485e-06, "loss": 0.1163, "step": 203675 }, { "epoch": 2.0, "grad_norm": 4.4593658447265625, "learning_rate": 1.66198275862069e-06, "loss": 0.2252, "step": 203700 }, { "epoch": 2.0, "grad_norm": 10.084554672241211, "learning_rate": 1.6615517241379312e-06, "loss": 0.0906, "step": 203725 }, { "epoch": 2.0, "grad_norm": 5.303679466247559, "learning_rate": 1.6611206896551726e-06, "loss": 0.2177, "step": 203750 }, { "epoch": 2.0, "grad_norm": 7.303496360778809, "learning_rate": 1.6606896551724139e-06, "loss": 0.0846, "step": 203775 }, { "epoch": 2.0, "grad_norm": 4.958081245422363, "learning_rate": 1.6602586206896553e-06, "loss": 0.1908, "step": 203800 }, { "epoch": 2.0, "grad_norm": 9.61656665802002, "learning_rate": 1.6598275862068966e-06, "loss": 0.0967, "step": 203825 }, { "epoch": 2.0, "grad_norm": 4.2739715576171875, "learning_rate": 1.659396551724138e-06, "loss": 0.1883, "step": 203850 }, { "epoch": 2.01, "grad_norm": 7.839179992675781, "learning_rate": 1.6589655172413793e-06, "loss": 0.1089, "step": 203875 }, { "epoch": 2.01, "grad_norm": 5.237013816833496, "learning_rate": 1.6585344827586207e-06, "loss": 0.2069, "step": 203900 }, { "epoch": 2.01, "grad_norm": 8.12621784210205, "learning_rate": 1.6581034482758624e-06, "loss": 0.1092, "step": 203925 }, { "epoch": 2.01, "grad_norm": 4.524117946624756, "learning_rate": 1.6576724137931036e-06, "loss": 0.2284, "step": 203950 }, { "epoch": 2.01, "grad_norm": 6.5497212409973145, "learning_rate": 1.657241379310345e-06, "loss": 0.1244, "step": 203975 }, { "epoch": 2.01, "grad_norm": 4.005838871002197, "learning_rate": 1.6568103448275863e-06, "loss": 0.2135, "step": 204000 }, { "epoch": 2.01, "grad_norm": 8.158697128295898, "learning_rate": 1.6563793103448278e-06, "loss": 0.069, "step": 204025 }, { "epoch": 2.01, "grad_norm": 5.192374229431152, "learning_rate": 1.655948275862069e-06, "loss": 0.1745, "step": 204050 }, { "epoch": 2.01, "grad_norm": 9.711862564086914, "learning_rate": 1.6555172413793105e-06, "loss": 0.0667, "step": 204075 }, { "epoch": 2.01, "grad_norm": 4.785118579864502, "learning_rate": 1.6550862068965517e-06, "loss": 0.2115, "step": 204100 }, { "epoch": 2.01, "grad_norm": 4.6942138671875, "learning_rate": 1.6546551724137932e-06, "loss": 0.0961, "step": 204125 }, { "epoch": 2.01, "grad_norm": 4.269994258880615, "learning_rate": 1.6542241379310344e-06, "loss": 0.1793, "step": 204150 }, { "epoch": 2.01, "grad_norm": 13.155963897705078, "learning_rate": 1.653793103448276e-06, "loss": 0.1016, "step": 204175 }, { "epoch": 2.01, "grad_norm": 5.175486087799072, "learning_rate": 1.6533620689655176e-06, "loss": 0.2117, "step": 204200 }, { "epoch": 2.01, "grad_norm": 5.967070579528809, "learning_rate": 1.6529310344827588e-06, "loss": 0.1157, "step": 204225 }, { "epoch": 2.01, "grad_norm": 4.660403728485107, "learning_rate": 1.6525000000000003e-06, "loss": 0.171, "step": 204250 }, { "epoch": 2.01, "grad_norm": 5.973852634429932, "learning_rate": 1.6520689655172415e-06, "loss": 0.0992, "step": 204275 }, { "epoch": 2.01, "grad_norm": 4.19060754776001, "learning_rate": 1.651637931034483e-06, "loss": 0.2195, "step": 204300 }, { "epoch": 2.01, "grad_norm": 11.211939811706543, "learning_rate": 1.6512068965517242e-06, "loss": 0.128, "step": 204325 }, { "epoch": 2.01, "grad_norm": 4.135138988494873, "learning_rate": 1.650793103448276e-06, "loss": 0.2274, "step": 204350 }, { "epoch": 2.01, "grad_norm": 8.122978210449219, "learning_rate": 1.6503620689655173e-06, "loss": 0.0956, "step": 204375 }, { "epoch": 2.01, "grad_norm": 4.28475284576416, "learning_rate": 1.6499310344827588e-06, "loss": 0.2046, "step": 204400 }, { "epoch": 2.01, "grad_norm": 5.161162853240967, "learning_rate": 1.6495000000000003e-06, "loss": 0.0834, "step": 204425 }, { "epoch": 2.01, "grad_norm": 3.9565064907073975, "learning_rate": 1.6490689655172415e-06, "loss": 0.1836, "step": 204450 }, { "epoch": 2.01, "grad_norm": 11.335373878479004, "learning_rate": 1.648637931034483e-06, "loss": 0.0574, "step": 204475 }, { "epoch": 2.01, "grad_norm": 4.036355018615723, "learning_rate": 1.6482068965517242e-06, "loss": 0.2236, "step": 204500 }, { "epoch": 2.01, "grad_norm": 3.5706918239593506, "learning_rate": 1.6477758620689657e-06, "loss": 0.105, "step": 204525 }, { "epoch": 2.01, "grad_norm": 5.05307674407959, "learning_rate": 1.647344827586207e-06, "loss": 0.1811, "step": 204550 }, { "epoch": 2.01, "grad_norm": 4.923802852630615, "learning_rate": 1.6469137931034484e-06, "loss": 0.0804, "step": 204575 }, { "epoch": 2.01, "grad_norm": 3.002408027648926, "learning_rate": 1.6464827586206896e-06, "loss": 0.1908, "step": 204600 }, { "epoch": 2.01, "grad_norm": 6.112510681152344, "learning_rate": 1.6460517241379313e-06, "loss": 0.0784, "step": 204625 }, { "epoch": 2.01, "grad_norm": 2.768955945968628, "learning_rate": 1.6456206896551723e-06, "loss": 0.1966, "step": 204650 }, { "epoch": 2.01, "grad_norm": 6.22660493850708, "learning_rate": 1.645189655172414e-06, "loss": 0.0674, "step": 204675 }, { "epoch": 2.01, "grad_norm": 3.420344352722168, "learning_rate": 1.6447586206896554e-06, "loss": 0.2086, "step": 204700 }, { "epoch": 2.01, "grad_norm": 9.290632247924805, "learning_rate": 1.6443275862068967e-06, "loss": 0.0983, "step": 204725 }, { "epoch": 2.01, "grad_norm": 3.424551248550415, "learning_rate": 1.6438965517241381e-06, "loss": 0.2297, "step": 204750 }, { "epoch": 2.01, "grad_norm": 7.2936787605285645, "learning_rate": 1.6434655172413794e-06, "loss": 0.1185, "step": 204775 }, { "epoch": 2.01, "grad_norm": 4.334712028503418, "learning_rate": 1.6430344827586208e-06, "loss": 0.1843, "step": 204800 }, { "epoch": 2.01, "grad_norm": 19.528249740600586, "learning_rate": 1.642603448275862e-06, "loss": 0.0914, "step": 204825 }, { "epoch": 2.01, "grad_norm": 4.127599239349365, "learning_rate": 1.6421724137931035e-06, "loss": 0.1613, "step": 204850 }, { "epoch": 2.02, "grad_norm": 10.686935424804688, "learning_rate": 1.6417413793103448e-06, "loss": 0.1324, "step": 204875 }, { "epoch": 2.02, "grad_norm": 4.237809658050537, "learning_rate": 1.6413103448275862e-06, "loss": 0.1848, "step": 204900 }, { "epoch": 2.02, "grad_norm": 6.170703411102295, "learning_rate": 1.640879310344828e-06, "loss": 0.0834, "step": 204925 }, { "epoch": 2.02, "grad_norm": 3.9383044242858887, "learning_rate": 1.6404482758620691e-06, "loss": 0.1841, "step": 204950 }, { "epoch": 2.02, "grad_norm": 7.1617751121521, "learning_rate": 1.6400172413793106e-06, "loss": 0.0718, "step": 204975 }, { "epoch": 2.02, "grad_norm": 3.4936721324920654, "learning_rate": 1.6395862068965519e-06, "loss": 0.1935, "step": 205000 }, { "epoch": 2.02, "grad_norm": 11.402009010314941, "learning_rate": 1.6391551724137933e-06, "loss": 0.0958, "step": 205025 }, { "epoch": 2.02, "grad_norm": 5.06321907043457, "learning_rate": 1.6387241379310346e-06, "loss": 0.2692, "step": 205050 }, { "epoch": 2.02, "grad_norm": 3.2286078929901123, "learning_rate": 1.638293103448276e-06, "loss": 0.0838, "step": 205075 }, { "epoch": 2.02, "grad_norm": 5.225040435791016, "learning_rate": 1.6378620689655173e-06, "loss": 0.2078, "step": 205100 }, { "epoch": 2.02, "grad_norm": 25.393627166748047, "learning_rate": 1.6374310344827587e-06, "loss": 0.1066, "step": 205125 }, { "epoch": 2.02, "grad_norm": 4.289276123046875, "learning_rate": 1.6370000000000002e-06, "loss": 0.1984, "step": 205150 }, { "epoch": 2.02, "grad_norm": 6.523091793060303, "learning_rate": 1.6365689655172414e-06, "loss": 0.0798, "step": 205175 }, { "epoch": 2.02, "grad_norm": 6.188960552215576, "learning_rate": 1.636137931034483e-06, "loss": 0.1587, "step": 205200 }, { "epoch": 2.02, "grad_norm": 7.576614856719971, "learning_rate": 1.6357068965517241e-06, "loss": 0.103, "step": 205225 }, { "epoch": 2.02, "grad_norm": 6.2431230545043945, "learning_rate": 1.6352758620689658e-06, "loss": 0.1657, "step": 205250 }, { "epoch": 2.02, "grad_norm": 8.110132217407227, "learning_rate": 1.634844827586207e-06, "loss": 0.0933, "step": 205275 }, { "epoch": 2.02, "grad_norm": 3.877791404724121, "learning_rate": 1.6344137931034485e-06, "loss": 0.197, "step": 205300 }, { "epoch": 2.02, "grad_norm": 6.901827335357666, "learning_rate": 1.6339827586206897e-06, "loss": 0.0944, "step": 205325 }, { "epoch": 2.02, "grad_norm": 5.293367862701416, "learning_rate": 1.6335517241379312e-06, "loss": 0.2058, "step": 205350 }, { "epoch": 2.02, "grad_norm": 12.101127624511719, "learning_rate": 1.6331206896551726e-06, "loss": 0.1012, "step": 205375 }, { "epoch": 2.02, "grad_norm": 4.031815528869629, "learning_rate": 1.6326896551724139e-06, "loss": 0.2485, "step": 205400 }, { "epoch": 2.02, "grad_norm": 7.458622932434082, "learning_rate": 1.6322586206896553e-06, "loss": 0.087, "step": 205425 }, { "epoch": 2.02, "grad_norm": 3.9872827529907227, "learning_rate": 1.6318275862068966e-06, "loss": 0.1673, "step": 205450 }, { "epoch": 2.02, "grad_norm": 1.976589560508728, "learning_rate": 1.631396551724138e-06, "loss": 0.0872, "step": 205475 }, { "epoch": 2.02, "grad_norm": 3.814929962158203, "learning_rate": 1.6309655172413793e-06, "loss": 0.2745, "step": 205500 }, { "epoch": 2.02, "grad_norm": 7.864455699920654, "learning_rate": 1.630534482758621e-06, "loss": 0.1061, "step": 205525 }, { "epoch": 2.02, "grad_norm": 4.061902046203613, "learning_rate": 1.630103448275862e-06, "loss": 0.2056, "step": 205550 }, { "epoch": 2.02, "grad_norm": 4.097835540771484, "learning_rate": 1.6296724137931037e-06, "loss": 0.0558, "step": 205575 }, { "epoch": 2.02, "grad_norm": 5.319177627563477, "learning_rate": 1.6292413793103451e-06, "loss": 0.2522, "step": 205600 }, { "epoch": 2.02, "grad_norm": 8.807332992553711, "learning_rate": 1.6288103448275864e-06, "loss": 0.0825, "step": 205625 }, { "epoch": 2.02, "grad_norm": 4.1007490158081055, "learning_rate": 1.6283793103448278e-06, "loss": 0.202, "step": 205650 }, { "epoch": 2.02, "grad_norm": 2.443835973739624, "learning_rate": 1.627948275862069e-06, "loss": 0.096, "step": 205675 }, { "epoch": 2.02, "grad_norm": 4.091855525970459, "learning_rate": 1.6275172413793105e-06, "loss": 0.2005, "step": 205700 }, { "epoch": 2.02, "grad_norm": 8.753847122192383, "learning_rate": 1.6270862068965518e-06, "loss": 0.0879, "step": 205725 }, { "epoch": 2.02, "grad_norm": 4.520854949951172, "learning_rate": 1.6266551724137932e-06, "loss": 0.193, "step": 205750 }, { "epoch": 2.02, "grad_norm": 11.260403633117676, "learning_rate": 1.6262241379310345e-06, "loss": 0.0882, "step": 205775 }, { "epoch": 2.02, "grad_norm": 3.840649127960205, "learning_rate": 1.625793103448276e-06, "loss": 0.2241, "step": 205800 }, { "epoch": 2.02, "grad_norm": 6.285272598266602, "learning_rate": 1.6253620689655172e-06, "loss": 0.1293, "step": 205825 }, { "epoch": 2.02, "grad_norm": 3.7476959228515625, "learning_rate": 1.6249310344827588e-06, "loss": 0.158, "step": 205850 }, { "epoch": 2.02, "grad_norm": 10.008858680725098, "learning_rate": 1.6245000000000003e-06, "loss": 0.0997, "step": 205875 }, { "epoch": 2.03, "grad_norm": 4.830460071563721, "learning_rate": 1.6240689655172415e-06, "loss": 0.2292, "step": 205900 }, { "epoch": 2.03, "grad_norm": 7.025946140289307, "learning_rate": 1.623637931034483e-06, "loss": 0.0955, "step": 205925 }, { "epoch": 2.03, "grad_norm": 4.683816909790039, "learning_rate": 1.6232068965517242e-06, "loss": 0.2389, "step": 205950 }, { "epoch": 2.03, "grad_norm": 0.1963234841823578, "learning_rate": 1.6227758620689657e-06, "loss": 0.0823, "step": 205975 }, { "epoch": 2.03, "grad_norm": 5.0199360847473145, "learning_rate": 1.622344827586207e-06, "loss": 0.1788, "step": 206000 }, { "epoch": 2.03, "grad_norm": 2.1027421951293945, "learning_rate": 1.6219137931034484e-06, "loss": 0.0936, "step": 206025 }, { "epoch": 2.03, "grad_norm": 3.4507181644439697, "learning_rate": 1.6214827586206896e-06, "loss": 0.1671, "step": 206050 }, { "epoch": 2.03, "grad_norm": 6.067552089691162, "learning_rate": 1.621051724137931e-06, "loss": 0.0724, "step": 206075 }, { "epoch": 2.03, "grad_norm": 3.297032117843628, "learning_rate": 1.6206206896551728e-06, "loss": 0.2055, "step": 206100 }, { "epoch": 2.03, "grad_norm": 8.625247955322266, "learning_rate": 1.6201896551724138e-06, "loss": 0.0798, "step": 206125 }, { "epoch": 2.03, "grad_norm": 4.2229413986206055, "learning_rate": 1.6197586206896555e-06, "loss": 0.1637, "step": 206150 }, { "epoch": 2.03, "grad_norm": 10.626544952392578, "learning_rate": 1.6193275862068967e-06, "loss": 0.1016, "step": 206175 }, { "epoch": 2.03, "grad_norm": 3.120094060897827, "learning_rate": 1.6188965517241382e-06, "loss": 0.2002, "step": 206200 }, { "epoch": 2.03, "grad_norm": 7.943653583526611, "learning_rate": 1.6184655172413794e-06, "loss": 0.0956, "step": 206225 }, { "epoch": 2.03, "grad_norm": 4.862173557281494, "learning_rate": 1.6180344827586209e-06, "loss": 0.2017, "step": 206250 }, { "epoch": 2.03, "grad_norm": 10.294102668762207, "learning_rate": 1.6176034482758621e-06, "loss": 0.091, "step": 206275 }, { "epoch": 2.03, "grad_norm": 3.6006643772125244, "learning_rate": 1.6171724137931036e-06, "loss": 0.1716, "step": 206300 }, { "epoch": 2.03, "grad_norm": 10.761106491088867, "learning_rate": 1.616741379310345e-06, "loss": 0.0948, "step": 206325 }, { "epoch": 2.03, "grad_norm": 4.156013488769531, "learning_rate": 1.6163103448275863e-06, "loss": 0.1766, "step": 206350 }, { "epoch": 2.03, "grad_norm": 6.936644554138184, "learning_rate": 1.6158793103448277e-06, "loss": 0.1042, "step": 206375 }, { "epoch": 2.03, "grad_norm": 3.672916889190674, "learning_rate": 1.615448275862069e-06, "loss": 0.2273, "step": 206400 }, { "epoch": 2.03, "grad_norm": 4.883218288421631, "learning_rate": 1.6150172413793106e-06, "loss": 0.1026, "step": 206425 }, { "epoch": 2.03, "grad_norm": 4.532534599304199, "learning_rate": 1.6145862068965517e-06, "loss": 0.166, "step": 206450 }, { "epoch": 2.03, "grad_norm": 13.765883445739746, "learning_rate": 1.6141551724137933e-06, "loss": 0.0941, "step": 206475 }, { "epoch": 2.03, "grad_norm": 5.8282694816589355, "learning_rate": 1.6137241379310346e-06, "loss": 0.185, "step": 206500 }, { "epoch": 2.03, "grad_norm": 9.856562614440918, "learning_rate": 1.613293103448276e-06, "loss": 0.0912, "step": 206525 }, { "epoch": 2.03, "grad_norm": 5.002504825592041, "learning_rate": 1.6128793103448275e-06, "loss": 0.212, "step": 206550 }, { "epoch": 2.03, "grad_norm": 9.786355972290039, "learning_rate": 1.6124482758620692e-06, "loss": 0.13, "step": 206575 }, { "epoch": 2.03, "grad_norm": 3.5867650508880615, "learning_rate": 1.6120172413793106e-06, "loss": 0.2362, "step": 206600 }, { "epoch": 2.03, "grad_norm": 7.480104923248291, "learning_rate": 1.6115862068965519e-06, "loss": 0.0873, "step": 206625 }, { "epoch": 2.03, "grad_norm": 4.203890323638916, "learning_rate": 1.6111551724137933e-06, "loss": 0.1771, "step": 206650 }, { "epoch": 2.03, "grad_norm": 10.284107208251953, "learning_rate": 1.6107241379310346e-06, "loss": 0.0888, "step": 206675 }, { "epoch": 2.03, "grad_norm": 19.002344131469727, "learning_rate": 1.610293103448276e-06, "loss": 0.2596, "step": 206700 }, { "epoch": 2.03, "grad_norm": 11.621265411376953, "learning_rate": 1.6098620689655173e-06, "loss": 0.1117, "step": 206725 }, { "epoch": 2.03, "grad_norm": 5.127709865570068, "learning_rate": 1.6094310344827587e-06, "loss": 0.1888, "step": 206750 }, { "epoch": 2.03, "grad_norm": 3.3708128929138184, "learning_rate": 1.609e-06, "loss": 0.0796, "step": 206775 }, { "epoch": 2.03, "grad_norm": 4.982000350952148, "learning_rate": 1.6085689655172414e-06, "loss": 0.2279, "step": 206800 }, { "epoch": 2.03, "grad_norm": 5.419832706451416, "learning_rate": 1.608137931034483e-06, "loss": 0.0741, "step": 206825 }, { "epoch": 2.03, "grad_norm": 3.7456166744232178, "learning_rate": 1.6077068965517243e-06, "loss": 0.2101, "step": 206850 }, { "epoch": 2.03, "grad_norm": 7.532432556152344, "learning_rate": 1.6072758620689658e-06, "loss": 0.0883, "step": 206875 }, { "epoch": 2.03, "grad_norm": 4.297346591949463, "learning_rate": 1.606844827586207e-06, "loss": 0.1822, "step": 206900 }, { "epoch": 2.04, "grad_norm": 9.202104568481445, "learning_rate": 1.6064137931034485e-06, "loss": 0.0902, "step": 206925 }, { "epoch": 2.04, "grad_norm": 4.028449058532715, "learning_rate": 1.6059827586206897e-06, "loss": 0.2064, "step": 206950 }, { "epoch": 2.04, "grad_norm": 11.408668518066406, "learning_rate": 1.6055517241379312e-06, "loss": 0.096, "step": 206975 }, { "epoch": 2.04, "grad_norm": 3.7707810401916504, "learning_rate": 1.6051206896551724e-06, "loss": 0.1818, "step": 207000 }, { "epoch": 2.04, "grad_norm": 5.272019386291504, "learning_rate": 1.604689655172414e-06, "loss": 0.0768, "step": 207025 }, { "epoch": 2.04, "grad_norm": 4.663258075714111, "learning_rate": 1.6042586206896554e-06, "loss": 0.2152, "step": 207050 }, { "epoch": 2.04, "grad_norm": 7.456428050994873, "learning_rate": 1.6038275862068966e-06, "loss": 0.0849, "step": 207075 }, { "epoch": 2.04, "grad_norm": 2.991079330444336, "learning_rate": 1.6033965517241383e-06, "loss": 0.2293, "step": 207100 }, { "epoch": 2.04, "grad_norm": 22.672651290893555, "learning_rate": 1.6029655172413793e-06, "loss": 0.0895, "step": 207125 }, { "epoch": 2.04, "grad_norm": 5.070868492126465, "learning_rate": 1.602534482758621e-06, "loss": 0.2206, "step": 207150 }, { "epoch": 2.04, "grad_norm": 7.5417022705078125, "learning_rate": 1.6021034482758622e-06, "loss": 0.1079, "step": 207175 }, { "epoch": 2.04, "grad_norm": 9.325801849365234, "learning_rate": 1.6016724137931037e-06, "loss": 0.1402, "step": 207200 }, { "epoch": 2.04, "grad_norm": 6.56144905090332, "learning_rate": 1.601241379310345e-06, "loss": 0.0934, "step": 207225 }, { "epoch": 2.04, "grad_norm": 3.9495038986206055, "learning_rate": 1.6008103448275864e-06, "loss": 0.2034, "step": 207250 }, { "epoch": 2.04, "grad_norm": 7.990026950836182, "learning_rate": 1.6003793103448278e-06, "loss": 0.1131, "step": 207275 }, { "epoch": 2.04, "grad_norm": 5.487949848175049, "learning_rate": 1.599948275862069e-06, "loss": 0.1937, "step": 207300 }, { "epoch": 2.04, "grad_norm": 8.349239349365234, "learning_rate": 1.5995172413793105e-06, "loss": 0.0873, "step": 207325 }, { "epoch": 2.04, "grad_norm": 4.543038368225098, "learning_rate": 1.5990862068965518e-06, "loss": 0.1622, "step": 207350 }, { "epoch": 2.04, "grad_norm": 5.069793701171875, "learning_rate": 1.5986551724137932e-06, "loss": 0.0992, "step": 207375 }, { "epoch": 2.04, "grad_norm": 3.673417329788208, "learning_rate": 1.5982241379310345e-06, "loss": 0.238, "step": 207400 }, { "epoch": 2.04, "grad_norm": 10.333695411682129, "learning_rate": 1.5977931034482761e-06, "loss": 0.1004, "step": 207425 }, { "epoch": 2.04, "grad_norm": 4.604072093963623, "learning_rate": 1.5973620689655172e-06, "loss": 0.2037, "step": 207450 }, { "epoch": 2.04, "grad_norm": 12.783795356750488, "learning_rate": 1.5969310344827588e-06, "loss": 0.1139, "step": 207475 }, { "epoch": 2.04, "grad_norm": 3.065124034881592, "learning_rate": 1.5965e-06, "loss": 0.2055, "step": 207500 }, { "epoch": 2.04, "grad_norm": 6.515017986297607, "learning_rate": 1.5960689655172415e-06, "loss": 0.0979, "step": 207525 }, { "epoch": 2.04, "grad_norm": 6.282369613647461, "learning_rate": 1.595637931034483e-06, "loss": 0.2132, "step": 207550 }, { "epoch": 2.04, "grad_norm": 5.633284568786621, "learning_rate": 1.5952068965517242e-06, "loss": 0.0966, "step": 207575 }, { "epoch": 2.04, "grad_norm": 4.276591777801514, "learning_rate": 1.5947758620689657e-06, "loss": 0.1779, "step": 207600 }, { "epoch": 2.04, "grad_norm": 9.768033981323242, "learning_rate": 1.594344827586207e-06, "loss": 0.0723, "step": 207625 }, { "epoch": 2.04, "grad_norm": 3.540529251098633, "learning_rate": 1.5939137931034484e-06, "loss": 0.1875, "step": 207650 }, { "epoch": 2.04, "grad_norm": 6.954980850219727, "learning_rate": 1.5934827586206896e-06, "loss": 0.0917, "step": 207675 }, { "epoch": 2.04, "grad_norm": 3.990705966949463, "learning_rate": 1.593051724137931e-06, "loss": 0.2298, "step": 207700 }, { "epoch": 2.04, "grad_norm": 8.276272773742676, "learning_rate": 1.5926206896551723e-06, "loss": 0.0825, "step": 207725 }, { "epoch": 2.04, "grad_norm": 4.132732391357422, "learning_rate": 1.592189655172414e-06, "loss": 0.204, "step": 207750 }, { "epoch": 2.04, "grad_norm": 2.9219398498535156, "learning_rate": 1.5917586206896555e-06, "loss": 0.0953, "step": 207775 }, { "epoch": 2.04, "grad_norm": 4.6448655128479, "learning_rate": 1.5913275862068967e-06, "loss": 0.1807, "step": 207800 }, { "epoch": 2.04, "grad_norm": 8.888419151306152, "learning_rate": 1.5908965517241382e-06, "loss": 0.0806, "step": 207825 }, { "epoch": 2.04, "grad_norm": 4.495108604431152, "learning_rate": 1.5904655172413794e-06, "loss": 0.2075, "step": 207850 }, { "epoch": 2.04, "grad_norm": 13.5252685546875, "learning_rate": 1.5900344827586209e-06, "loss": 0.079, "step": 207875 }, { "epoch": 2.04, "grad_norm": 3.724276065826416, "learning_rate": 1.5896034482758621e-06, "loss": 0.1893, "step": 207900 }, { "epoch": 2.05, "grad_norm": 3.1361351013183594, "learning_rate": 1.5891724137931036e-06, "loss": 0.0983, "step": 207925 }, { "epoch": 2.05, "grad_norm": 3.4891576766967773, "learning_rate": 1.5887413793103448e-06, "loss": 0.2602, "step": 207950 }, { "epoch": 2.05, "grad_norm": 10.091974258422852, "learning_rate": 1.5883103448275863e-06, "loss": 0.0814, "step": 207975 }, { "epoch": 2.05, "grad_norm": 5.3357439041137695, "learning_rate": 1.5878793103448277e-06, "loss": 0.1928, "step": 208000 }, { "epoch": 2.05, "grad_norm": 4.336633682250977, "learning_rate": 1.587448275862069e-06, "loss": 0.1096, "step": 208025 }, { "epoch": 2.05, "grad_norm": 3.146547555923462, "learning_rate": 1.5870172413793106e-06, "loss": 0.1966, "step": 208050 }, { "epoch": 2.05, "grad_norm": 6.709781646728516, "learning_rate": 1.5865862068965519e-06, "loss": 0.1132, "step": 208075 }, { "epoch": 2.05, "grad_norm": 4.435120105743408, "learning_rate": 1.5861551724137933e-06, "loss": 0.2014, "step": 208100 }, { "epoch": 2.05, "grad_norm": 9.434395790100098, "learning_rate": 1.5857241379310346e-06, "loss": 0.0894, "step": 208125 }, { "epoch": 2.05, "grad_norm": 4.339469909667969, "learning_rate": 1.585293103448276e-06, "loss": 0.1719, "step": 208150 }, { "epoch": 2.05, "grad_norm": 3.9315359592437744, "learning_rate": 1.5848620689655173e-06, "loss": 0.1017, "step": 208175 }, { "epoch": 2.05, "grad_norm": 4.552648544311523, "learning_rate": 1.5844310344827588e-06, "loss": 0.1968, "step": 208200 }, { "epoch": 2.05, "grad_norm": 7.58515739440918, "learning_rate": 1.5840000000000002e-06, "loss": 0.0988, "step": 208225 }, { "epoch": 2.05, "grad_norm": 4.524840831756592, "learning_rate": 1.5835689655172415e-06, "loss": 0.1848, "step": 208250 }, { "epoch": 2.05, "grad_norm": 3.2762393951416016, "learning_rate": 1.583137931034483e-06, "loss": 0.0873, "step": 208275 }, { "epoch": 2.05, "grad_norm": 4.212737083435059, "learning_rate": 1.5827068965517242e-06, "loss": 0.1865, "step": 208300 }, { "epoch": 2.05, "grad_norm": 7.5344977378845215, "learning_rate": 1.5822758620689658e-06, "loss": 0.0823, "step": 208325 }, { "epoch": 2.05, "grad_norm": 2.972705125808716, "learning_rate": 1.5818448275862069e-06, "loss": 0.189, "step": 208350 }, { "epoch": 2.05, "grad_norm": 11.010865211486816, "learning_rate": 1.5814137931034485e-06, "loss": 0.0827, "step": 208375 }, { "epoch": 2.05, "grad_norm": 7.018374919891357, "learning_rate": 1.5809827586206898e-06, "loss": 0.2494, "step": 208400 }, { "epoch": 2.05, "grad_norm": 5.747298240661621, "learning_rate": 1.5805517241379312e-06, "loss": 0.0822, "step": 208425 }, { "epoch": 2.05, "grad_norm": 5.918389320373535, "learning_rate": 1.5801206896551727e-06, "loss": 0.2531, "step": 208450 }, { "epoch": 2.05, "grad_norm": 5.539153575897217, "learning_rate": 1.579689655172414e-06, "loss": 0.0888, "step": 208475 }, { "epoch": 2.05, "grad_norm": 4.0143022537231445, "learning_rate": 1.5792586206896554e-06, "loss": 0.2092, "step": 208500 }, { "epoch": 2.05, "grad_norm": 10.25486946105957, "learning_rate": 1.5788275862068966e-06, "loss": 0.1051, "step": 208525 }, { "epoch": 2.05, "grad_norm": 4.081014633178711, "learning_rate": 1.578396551724138e-06, "loss": 0.2078, "step": 208550 }, { "epoch": 2.05, "grad_norm": 11.69734001159668, "learning_rate": 1.5779655172413793e-06, "loss": 0.0993, "step": 208575 }, { "epoch": 2.05, "grad_norm": 6.031224727630615, "learning_rate": 1.5775517241379312e-06, "loss": 0.1951, "step": 208600 }, { "epoch": 2.05, "grad_norm": 9.600008010864258, "learning_rate": 1.5771206896551725e-06, "loss": 0.1078, "step": 208625 }, { "epoch": 2.05, "grad_norm": 3.7903459072113037, "learning_rate": 1.576689655172414e-06, "loss": 0.1876, "step": 208650 }, { "epoch": 2.05, "grad_norm": 1.4776735305786133, "learning_rate": 1.5762586206896552e-06, "loss": 0.0665, "step": 208675 }, { "epoch": 2.05, "grad_norm": 3.5261945724487305, "learning_rate": 1.5758275862068966e-06, "loss": 0.219, "step": 208700 }, { "epoch": 2.05, "grad_norm": 5.186706066131592, "learning_rate": 1.5753965517241383e-06, "loss": 0.0867, "step": 208725 }, { "epoch": 2.05, "grad_norm": 5.79729700088501, "learning_rate": 1.5749655172413793e-06, "loss": 0.1842, "step": 208750 }, { "epoch": 2.05, "grad_norm": 7.5387091636657715, "learning_rate": 1.574534482758621e-06, "loss": 0.0947, "step": 208775 }, { "epoch": 2.05, "grad_norm": 4.880837917327881, "learning_rate": 1.5741034482758622e-06, "loss": 0.2246, "step": 208800 }, { "epoch": 2.05, "grad_norm": 6.2518839836120605, "learning_rate": 1.5736724137931037e-06, "loss": 0.1006, "step": 208825 }, { "epoch": 2.05, "grad_norm": 3.885031223297119, "learning_rate": 1.573241379310345e-06, "loss": 0.1984, "step": 208850 }, { "epoch": 2.05, "grad_norm": 3.780642509460449, "learning_rate": 1.5728103448275864e-06, "loss": 0.0765, "step": 208875 }, { "epoch": 2.05, "grad_norm": 3.6753909587860107, "learning_rate": 1.5723793103448276e-06, "loss": 0.1755, "step": 208900 }, { "epoch": 2.05, "grad_norm": 5.878759384155273, "learning_rate": 1.571948275862069e-06, "loss": 0.0703, "step": 208925 }, { "epoch": 2.06, "grad_norm": 4.988179683685303, "learning_rate": 1.5715172413793105e-06, "loss": 0.2466, "step": 208950 }, { "epoch": 2.06, "grad_norm": 5.267677307128906, "learning_rate": 1.5710862068965518e-06, "loss": 0.0996, "step": 208975 }, { "epoch": 2.06, "grad_norm": 4.368093967437744, "learning_rate": 1.5706551724137932e-06, "loss": 0.1812, "step": 209000 }, { "epoch": 2.06, "grad_norm": 8.214359283447266, "learning_rate": 1.5702241379310345e-06, "loss": 0.1169, "step": 209025 }, { "epoch": 2.06, "grad_norm": 6.099796295166016, "learning_rate": 1.5697931034482762e-06, "loss": 0.2118, "step": 209050 }, { "epoch": 2.06, "grad_norm": 3.324183225631714, "learning_rate": 1.5693620689655172e-06, "loss": 0.0746, "step": 209075 }, { "epoch": 2.06, "grad_norm": 5.944307327270508, "learning_rate": 1.5689310344827589e-06, "loss": 0.1874, "step": 209100 }, { "epoch": 2.06, "grad_norm": 2.596550703048706, "learning_rate": 1.5685e-06, "loss": 0.0638, "step": 209125 }, { "epoch": 2.06, "grad_norm": 5.014053821563721, "learning_rate": 1.5680689655172416e-06, "loss": 0.1998, "step": 209150 }, { "epoch": 2.06, "grad_norm": 6.201009273529053, "learning_rate": 1.5676379310344828e-06, "loss": 0.0868, "step": 209175 }, { "epoch": 2.06, "grad_norm": 4.824102401733398, "learning_rate": 1.5672068965517243e-06, "loss": 0.2208, "step": 209200 }, { "epoch": 2.06, "grad_norm": 7.535545349121094, "learning_rate": 1.5667758620689657e-06, "loss": 0.1337, "step": 209225 }, { "epoch": 2.06, "grad_norm": 3.3413519859313965, "learning_rate": 1.566344827586207e-06, "loss": 0.2062, "step": 209250 }, { "epoch": 2.06, "grad_norm": 9.237353324890137, "learning_rate": 1.5659137931034484e-06, "loss": 0.1018, "step": 209275 }, { "epoch": 2.06, "grad_norm": 6.442529201507568, "learning_rate": 1.5654827586206897e-06, "loss": 0.1527, "step": 209300 }, { "epoch": 2.06, "grad_norm": 11.056655883789062, "learning_rate": 1.5650517241379311e-06, "loss": 0.0945, "step": 209325 }, { "epoch": 2.06, "grad_norm": 4.932990550994873, "learning_rate": 1.5646206896551724e-06, "loss": 0.2493, "step": 209350 }, { "epoch": 2.06, "grad_norm": 12.581755638122559, "learning_rate": 1.564189655172414e-06, "loss": 0.1324, "step": 209375 }, { "epoch": 2.06, "grad_norm": 3.564912796020508, "learning_rate": 1.5637586206896553e-06, "loss": 0.2102, "step": 209400 }, { "epoch": 2.06, "grad_norm": 12.742575645446777, "learning_rate": 1.5633275862068967e-06, "loss": 0.0926, "step": 209425 }, { "epoch": 2.06, "grad_norm": 4.832411289215088, "learning_rate": 1.5628965517241382e-06, "loss": 0.205, "step": 209450 }, { "epoch": 2.06, "grad_norm": 4.272007942199707, "learning_rate": 1.5624655172413794e-06, "loss": 0.0853, "step": 209475 }, { "epoch": 2.06, "grad_norm": 4.746196746826172, "learning_rate": 1.5620344827586209e-06, "loss": 0.189, "step": 209500 }, { "epoch": 2.06, "grad_norm": 9.550647735595703, "learning_rate": 1.5616034482758621e-06, "loss": 0.0748, "step": 209525 }, { "epoch": 2.06, "grad_norm": 5.2247724533081055, "learning_rate": 1.5611724137931036e-06, "loss": 0.2048, "step": 209550 }, { "epoch": 2.06, "grad_norm": 4.597546100616455, "learning_rate": 1.5607413793103448e-06, "loss": 0.0853, "step": 209575 }, { "epoch": 2.06, "grad_norm": 3.9395668506622314, "learning_rate": 1.5603103448275863e-06, "loss": 0.1884, "step": 209600 }, { "epoch": 2.06, "grad_norm": 5.415232181549072, "learning_rate": 1.5598793103448275e-06, "loss": 0.0829, "step": 209625 }, { "epoch": 2.06, "grad_norm": 4.242384433746338, "learning_rate": 1.559448275862069e-06, "loss": 0.1997, "step": 209650 }, { "epoch": 2.06, "grad_norm": 11.731938362121582, "learning_rate": 1.5590172413793107e-06, "loss": 0.0714, "step": 209675 }, { "epoch": 2.06, "grad_norm": 4.849601745605469, "learning_rate": 1.558586206896552e-06, "loss": 0.1937, "step": 209700 }, { "epoch": 2.06, "grad_norm": 8.642897605895996, "learning_rate": 1.5581551724137934e-06, "loss": 0.1016, "step": 209725 }, { "epoch": 2.06, "grad_norm": 5.375568389892578, "learning_rate": 1.5577241379310346e-06, "loss": 0.1706, "step": 209750 }, { "epoch": 2.06, "grad_norm": 9.165555953979492, "learning_rate": 1.557293103448276e-06, "loss": 0.0937, "step": 209775 }, { "epoch": 2.06, "grad_norm": 3.8694000244140625, "learning_rate": 1.5568620689655173e-06, "loss": 0.1667, "step": 209800 }, { "epoch": 2.06, "grad_norm": 8.040675163269043, "learning_rate": 1.5564310344827588e-06, "loss": 0.076, "step": 209825 }, { "epoch": 2.06, "grad_norm": 6.082780361175537, "learning_rate": 1.556e-06, "loss": 0.2205, "step": 209850 }, { "epoch": 2.06, "grad_norm": 12.041570663452148, "learning_rate": 1.5555689655172415e-06, "loss": 0.1104, "step": 209875 }, { "epoch": 2.06, "grad_norm": 3.5284829139709473, "learning_rate": 1.555137931034483e-06, "loss": 0.2249, "step": 209900 }, { "epoch": 2.06, "grad_norm": 9.017595291137695, "learning_rate": 1.5547068965517242e-06, "loss": 0.1094, "step": 209925 }, { "epoch": 2.06, "grad_norm": 4.067469596862793, "learning_rate": 1.5542758620689658e-06, "loss": 0.1839, "step": 209950 }, { "epoch": 2.07, "grad_norm": 11.956036567687988, "learning_rate": 1.5538448275862069e-06, "loss": 0.122, "step": 209975 }, { "epoch": 2.07, "grad_norm": 4.080542087554932, "learning_rate": 1.5534137931034485e-06, "loss": 0.1703, "step": 210000 }, { "epoch": 2.07, "grad_norm": 6.256478309631348, "learning_rate": 1.5529827586206898e-06, "loss": 0.1107, "step": 210025 }, { "epoch": 2.07, "grad_norm": 4.187394618988037, "learning_rate": 1.5525517241379312e-06, "loss": 0.2573, "step": 210050 }, { "epoch": 2.07, "grad_norm": 6.825033664703369, "learning_rate": 1.5521206896551725e-06, "loss": 0.0962, "step": 210075 }, { "epoch": 2.07, "grad_norm": 4.043802261352539, "learning_rate": 1.551689655172414e-06, "loss": 0.1953, "step": 210100 }, { "epoch": 2.07, "grad_norm": 10.848024368286133, "learning_rate": 1.5512586206896554e-06, "loss": 0.0742, "step": 210125 }, { "epoch": 2.07, "grad_norm": 4.011955261230469, "learning_rate": 1.5508275862068966e-06, "loss": 0.1946, "step": 210150 }, { "epoch": 2.07, "grad_norm": 6.0371551513671875, "learning_rate": 1.550396551724138e-06, "loss": 0.083, "step": 210175 }, { "epoch": 2.07, "grad_norm": 4.063576698303223, "learning_rate": 1.5499655172413793e-06, "loss": 0.2025, "step": 210200 }, { "epoch": 2.07, "grad_norm": 5.230865955352783, "learning_rate": 1.5495344827586208e-06, "loss": 0.0774, "step": 210225 }, { "epoch": 2.07, "grad_norm": 2.9983506202697754, "learning_rate": 1.549103448275862e-06, "loss": 0.2011, "step": 210250 }, { "epoch": 2.07, "grad_norm": 9.768787384033203, "learning_rate": 1.5486724137931037e-06, "loss": 0.103, "step": 210275 }, { "epoch": 2.07, "grad_norm": 5.080224514007568, "learning_rate": 1.5482413793103447e-06, "loss": 0.1721, "step": 210300 }, { "epoch": 2.07, "grad_norm": 2.0840094089508057, "learning_rate": 1.5478103448275864e-06, "loss": 0.0794, "step": 210325 }, { "epoch": 2.07, "grad_norm": 3.800854206085205, "learning_rate": 1.5473793103448277e-06, "loss": 0.216, "step": 210350 }, { "epoch": 2.07, "grad_norm": 7.589548587799072, "learning_rate": 1.5469482758620691e-06, "loss": 0.0794, "step": 210375 }, { "epoch": 2.07, "grad_norm": 3.5328171253204346, "learning_rate": 1.5465172413793106e-06, "loss": 0.1889, "step": 210400 }, { "epoch": 2.07, "grad_norm": 4.460169315338135, "learning_rate": 1.5460862068965518e-06, "loss": 0.0759, "step": 210425 }, { "epoch": 2.07, "grad_norm": 5.315786361694336, "learning_rate": 1.5456551724137933e-06, "loss": 0.2206, "step": 210450 }, { "epoch": 2.07, "grad_norm": 11.505115509033203, "learning_rate": 1.5452241379310345e-06, "loss": 0.098, "step": 210475 }, { "epoch": 2.07, "grad_norm": 4.640589237213135, "learning_rate": 1.544793103448276e-06, "loss": 0.2256, "step": 210500 }, { "epoch": 2.07, "grad_norm": 1.316407561302185, "learning_rate": 1.5443620689655172e-06, "loss": 0.0744, "step": 210525 }, { "epoch": 2.07, "grad_norm": 4.613112449645996, "learning_rate": 1.5439310344827587e-06, "loss": 0.1863, "step": 210550 }, { "epoch": 2.07, "grad_norm": 5.691892147064209, "learning_rate": 1.5435e-06, "loss": 0.0844, "step": 210575 }, { "epoch": 2.07, "grad_norm": 3.513533353805542, "learning_rate": 1.5430862068965518e-06, "loss": 0.2324, "step": 210600 }, { "epoch": 2.07, "grad_norm": 7.019201755523682, "learning_rate": 1.5426551724137935e-06, "loss": 0.1028, "step": 210625 }, { "epoch": 2.07, "grad_norm": 5.437453746795654, "learning_rate": 1.5422241379310345e-06, "loss": 0.2192, "step": 210650 }, { "epoch": 2.07, "grad_norm": 12.547213554382324, "learning_rate": 1.5417931034482762e-06, "loss": 0.0991, "step": 210675 }, { "epoch": 2.07, "grad_norm": 3.6304051876068115, "learning_rate": 1.5413620689655174e-06, "loss": 0.217, "step": 210700 }, { "epoch": 2.07, "grad_norm": 10.457626342773438, "learning_rate": 1.5409310344827589e-06, "loss": 0.0805, "step": 210725 }, { "epoch": 2.07, "grad_norm": 5.062815189361572, "learning_rate": 1.5405000000000001e-06, "loss": 0.2095, "step": 210750 }, { "epoch": 2.07, "grad_norm": 11.672981262207031, "learning_rate": 1.5400689655172416e-06, "loss": 0.1011, "step": 210775 }, { "epoch": 2.07, "grad_norm": 4.306277275085449, "learning_rate": 1.5396379310344828e-06, "loss": 0.2216, "step": 210800 }, { "epoch": 2.07, "grad_norm": 7.288933753967285, "learning_rate": 1.5392068965517243e-06, "loss": 0.0764, "step": 210825 }, { "epoch": 2.07, "grad_norm": 3.678443193435669, "learning_rate": 1.5387758620689655e-06, "loss": 0.2023, "step": 210850 }, { "epoch": 2.07, "grad_norm": 6.116937637329102, "learning_rate": 1.538344827586207e-06, "loss": 0.09, "step": 210875 }, { "epoch": 2.07, "grad_norm": 4.77712345123291, "learning_rate": 1.5379137931034484e-06, "loss": 0.1912, "step": 210900 }, { "epoch": 2.07, "grad_norm": 8.17403793334961, "learning_rate": 1.5374827586206897e-06, "loss": 0.0772, "step": 210925 }, { "epoch": 2.07, "grad_norm": 2.800786256790161, "learning_rate": 1.5370517241379313e-06, "loss": 0.2107, "step": 210950 }, { "epoch": 2.08, "grad_norm": 9.863103866577148, "learning_rate": 1.5366206896551724e-06, "loss": 0.1216, "step": 210975 }, { "epoch": 2.08, "grad_norm": 4.614190101623535, "learning_rate": 1.536189655172414e-06, "loss": 0.1891, "step": 211000 }, { "epoch": 2.08, "grad_norm": 3.692059278488159, "learning_rate": 1.5357586206896553e-06, "loss": 0.0885, "step": 211025 }, { "epoch": 2.08, "grad_norm": 3.120770215988159, "learning_rate": 1.5353275862068967e-06, "loss": 0.2206, "step": 211050 }, { "epoch": 2.08, "grad_norm": 6.6826934814453125, "learning_rate": 1.534896551724138e-06, "loss": 0.0821, "step": 211075 }, { "epoch": 2.08, "grad_norm": 4.050808429718018, "learning_rate": 1.5344655172413794e-06, "loss": 0.1837, "step": 211100 }, { "epoch": 2.08, "grad_norm": 6.067375183105469, "learning_rate": 1.534034482758621e-06, "loss": 0.0853, "step": 211125 }, { "epoch": 2.08, "grad_norm": 4.606245517730713, "learning_rate": 1.5336034482758621e-06, "loss": 0.2006, "step": 211150 }, { "epoch": 2.08, "grad_norm": 5.704469203948975, "learning_rate": 1.5331724137931036e-06, "loss": 0.1079, "step": 211175 }, { "epoch": 2.08, "grad_norm": 5.565290927886963, "learning_rate": 1.5327413793103449e-06, "loss": 0.2336, "step": 211200 }, { "epoch": 2.08, "grad_norm": 10.921886444091797, "learning_rate": 1.5323103448275863e-06, "loss": 0.084, "step": 211225 }, { "epoch": 2.08, "grad_norm": 3.691983699798584, "learning_rate": 1.5318793103448276e-06, "loss": 0.202, "step": 211250 }, { "epoch": 2.08, "grad_norm": 5.120480060577393, "learning_rate": 1.5314482758620692e-06, "loss": 0.0985, "step": 211275 }, { "epoch": 2.08, "grad_norm": 3.4942190647125244, "learning_rate": 1.5310172413793103e-06, "loss": 0.2332, "step": 211300 }, { "epoch": 2.08, "grad_norm": 9.375436782836914, "learning_rate": 1.530586206896552e-06, "loss": 0.0867, "step": 211325 }, { "epoch": 2.08, "grad_norm": 6.041067600250244, "learning_rate": 1.5301551724137934e-06, "loss": 0.2145, "step": 211350 }, { "epoch": 2.08, "grad_norm": 4.441258430480957, "learning_rate": 1.5297241379310346e-06, "loss": 0.0962, "step": 211375 }, { "epoch": 2.08, "grad_norm": 4.499199390411377, "learning_rate": 1.529293103448276e-06, "loss": 0.2327, "step": 211400 }, { "epoch": 2.08, "grad_norm": 9.568520545959473, "learning_rate": 1.5288620689655173e-06, "loss": 0.0902, "step": 211425 }, { "epoch": 2.08, "grad_norm": 11.852845191955566, "learning_rate": 1.5284310344827588e-06, "loss": 0.2216, "step": 211450 }, { "epoch": 2.08, "grad_norm": 7.414396286010742, "learning_rate": 1.528e-06, "loss": 0.0876, "step": 211475 }, { "epoch": 2.08, "grad_norm": 5.234945297241211, "learning_rate": 1.5275689655172415e-06, "loss": 0.2316, "step": 211500 }, { "epoch": 2.08, "grad_norm": 7.735350131988525, "learning_rate": 1.5271379310344827e-06, "loss": 0.1003, "step": 211525 }, { "epoch": 2.08, "grad_norm": 10.592012405395508, "learning_rate": 1.5267068965517242e-06, "loss": 0.1753, "step": 211550 }, { "epoch": 2.08, "grad_norm": 11.201618194580078, "learning_rate": 1.5262758620689658e-06, "loss": 0.1084, "step": 211575 }, { "epoch": 2.08, "grad_norm": 5.078563690185547, "learning_rate": 1.525844827586207e-06, "loss": 0.2513, "step": 211600 }, { "epoch": 2.08, "grad_norm": 6.666827201843262, "learning_rate": 1.5254137931034486e-06, "loss": 0.1035, "step": 211625 }, { "epoch": 2.08, "grad_norm": 4.738763332366943, "learning_rate": 1.5249827586206898e-06, "loss": 0.2064, "step": 211650 }, { "epoch": 2.08, "grad_norm": 8.638925552368164, "learning_rate": 1.5245517241379313e-06, "loss": 0.1221, "step": 211675 }, { "epoch": 2.08, "grad_norm": 5.196473598480225, "learning_rate": 1.5241206896551725e-06, "loss": 0.1983, "step": 211700 }, { "epoch": 2.08, "grad_norm": 13.484312057495117, "learning_rate": 1.523689655172414e-06, "loss": 0.0907, "step": 211725 }, { "epoch": 2.08, "grad_norm": 3.746727705001831, "learning_rate": 1.5232586206896552e-06, "loss": 0.2065, "step": 211750 }, { "epoch": 2.08, "grad_norm": 6.553892612457275, "learning_rate": 1.5228275862068967e-06, "loss": 0.0903, "step": 211775 }, { "epoch": 2.08, "grad_norm": 4.036787509918213, "learning_rate": 1.5223965517241381e-06, "loss": 0.2347, "step": 211800 }, { "epoch": 2.08, "grad_norm": 9.377732276916504, "learning_rate": 1.5219655172413794e-06, "loss": 0.0815, "step": 211825 }, { "epoch": 2.08, "grad_norm": 3.754146099090576, "learning_rate": 1.521534482758621e-06, "loss": 0.2173, "step": 211850 }, { "epoch": 2.08, "grad_norm": 5.05708646774292, "learning_rate": 1.521103448275862e-06, "loss": 0.0664, "step": 211875 }, { "epoch": 2.08, "grad_norm": 8.478432655334473, "learning_rate": 1.5206724137931037e-06, "loss": 0.2151, "step": 211900 }, { "epoch": 2.08, "grad_norm": 1.92911958694458, "learning_rate": 1.520241379310345e-06, "loss": 0.0922, "step": 211925 }, { "epoch": 2.08, "grad_norm": 3.708538293838501, "learning_rate": 1.5198103448275864e-06, "loss": 0.1942, "step": 211950 }, { "epoch": 2.08, "grad_norm": 6.810051441192627, "learning_rate": 1.5193793103448277e-06, "loss": 0.1072, "step": 211975 }, { "epoch": 2.09, "grad_norm": 6.0955657958984375, "learning_rate": 1.5189482758620691e-06, "loss": 0.2325, "step": 212000 }, { "epoch": 2.09, "grad_norm": 6.515986442565918, "learning_rate": 1.5185172413793104e-06, "loss": 0.1067, "step": 212025 }, { "epoch": 2.09, "grad_norm": 9.376603126525879, "learning_rate": 1.5180862068965518e-06, "loss": 0.2275, "step": 212050 }, { "epoch": 2.09, "grad_norm": 6.70069694519043, "learning_rate": 1.5176551724137933e-06, "loss": 0.0911, "step": 212075 }, { "epoch": 2.09, "grad_norm": 4.6877121925354, "learning_rate": 1.5172241379310345e-06, "loss": 0.2316, "step": 212100 }, { "epoch": 2.09, "grad_norm": 10.423256874084473, "learning_rate": 1.516793103448276e-06, "loss": 0.1116, "step": 212125 }, { "epoch": 2.09, "grad_norm": 4.495413303375244, "learning_rate": 1.5163620689655172e-06, "loss": 0.1715, "step": 212150 }, { "epoch": 2.09, "grad_norm": 13.444143295288086, "learning_rate": 1.515931034482759e-06, "loss": 0.1022, "step": 212175 }, { "epoch": 2.09, "grad_norm": 4.0767292976379395, "learning_rate": 1.5155e-06, "loss": 0.1969, "step": 212200 }, { "epoch": 2.09, "grad_norm": 6.586531162261963, "learning_rate": 1.5150689655172416e-06, "loss": 0.115, "step": 212225 }, { "epoch": 2.09, "grad_norm": 12.068845748901367, "learning_rate": 1.5146379310344828e-06, "loss": 0.2069, "step": 212250 }, { "epoch": 2.09, "grad_norm": 6.354916572570801, "learning_rate": 1.5142068965517243e-06, "loss": 0.0624, "step": 212275 }, { "epoch": 2.09, "grad_norm": 6.362011909484863, "learning_rate": 1.5137758620689658e-06, "loss": 0.165, "step": 212300 }, { "epoch": 2.09, "grad_norm": 4.818583965301514, "learning_rate": 1.513344827586207e-06, "loss": 0.0938, "step": 212325 }, { "epoch": 2.09, "grad_norm": 4.575013637542725, "learning_rate": 1.5129137931034485e-06, "loss": 0.1795, "step": 212350 }, { "epoch": 2.09, "grad_norm": 4.156137943267822, "learning_rate": 1.5124827586206897e-06, "loss": 0.1095, "step": 212375 }, { "epoch": 2.09, "grad_norm": 3.638249635696411, "learning_rate": 1.5120517241379312e-06, "loss": 0.2034, "step": 212400 }, { "epoch": 2.09, "grad_norm": 6.100317001342773, "learning_rate": 1.5116206896551724e-06, "loss": 0.1069, "step": 212425 }, { "epoch": 2.09, "grad_norm": 3.422905445098877, "learning_rate": 1.5111896551724139e-06, "loss": 0.1695, "step": 212450 }, { "epoch": 2.09, "grad_norm": 10.630404472351074, "learning_rate": 1.5107586206896551e-06, "loss": 0.1183, "step": 212475 }, { "epoch": 2.09, "grad_norm": 4.568780422210693, "learning_rate": 1.5103275862068968e-06, "loss": 0.2276, "step": 212500 }, { "epoch": 2.09, "grad_norm": 8.228049278259277, "learning_rate": 1.5098965517241382e-06, "loss": 0.093, "step": 212525 }, { "epoch": 2.09, "grad_norm": 4.052206039428711, "learning_rate": 1.5094655172413795e-06, "loss": 0.2094, "step": 212550 }, { "epoch": 2.09, "grad_norm": 8.666744232177734, "learning_rate": 1.509034482758621e-06, "loss": 0.1079, "step": 212575 }, { "epoch": 2.09, "grad_norm": 4.830175876617432, "learning_rate": 1.5086034482758622e-06, "loss": 0.1965, "step": 212600 }, { "epoch": 2.09, "grad_norm": 7.420411109924316, "learning_rate": 1.5081724137931036e-06, "loss": 0.072, "step": 212625 }, { "epoch": 2.09, "grad_norm": 6.04510498046875, "learning_rate": 1.5077413793103449e-06, "loss": 0.2403, "step": 212650 }, { "epoch": 2.09, "grad_norm": 4.9995293617248535, "learning_rate": 1.5073103448275863e-06, "loss": 0.0747, "step": 212675 }, { "epoch": 2.09, "grad_norm": 3.908679485321045, "learning_rate": 1.5068793103448276e-06, "loss": 0.1954, "step": 212700 }, { "epoch": 2.09, "grad_norm": 8.955790519714355, "learning_rate": 1.506448275862069e-06, "loss": 0.0787, "step": 212725 }, { "epoch": 2.09, "grad_norm": 3.4592878818511963, "learning_rate": 1.5060172413793107e-06, "loss": 0.214, "step": 212750 }, { "epoch": 2.09, "grad_norm": 4.25701379776001, "learning_rate": 1.5055862068965517e-06, "loss": 0.0832, "step": 212775 }, { "epoch": 2.09, "grad_norm": 8.951177597045898, "learning_rate": 1.5051551724137934e-06, "loss": 0.2111, "step": 212800 }, { "epoch": 2.09, "grad_norm": 5.216668128967285, "learning_rate": 1.5047241379310346e-06, "loss": 0.1129, "step": 212825 }, { "epoch": 2.09, "grad_norm": 3.29591703414917, "learning_rate": 1.5043103448275863e-06, "loss": 0.2127, "step": 212850 }, { "epoch": 2.09, "grad_norm": 8.703904151916504, "learning_rate": 1.5038793103448276e-06, "loss": 0.0872, "step": 212875 }, { "epoch": 2.09, "grad_norm": 4.964531421661377, "learning_rate": 1.5034482758620692e-06, "loss": 0.2126, "step": 212900 }, { "epoch": 2.09, "grad_norm": 6.043702125549316, "learning_rate": 1.5030172413793105e-06, "loss": 0.1145, "step": 212925 }, { "epoch": 2.09, "grad_norm": 4.874965667724609, "learning_rate": 1.502586206896552e-06, "loss": 0.235, "step": 212950 }, { "epoch": 2.09, "grad_norm": 5.144018173217773, "learning_rate": 1.5021551724137932e-06, "loss": 0.0727, "step": 212975 }, { "epoch": 2.09, "grad_norm": 3.7837438583374023, "learning_rate": 1.5017241379310346e-06, "loss": 0.1836, "step": 213000 }, { "epoch": 2.1, "grad_norm": 5.970877170562744, "learning_rate": 1.501293103448276e-06, "loss": 0.0932, "step": 213025 }, { "epoch": 2.1, "grad_norm": 4.326685428619385, "learning_rate": 1.5008620689655173e-06, "loss": 0.2206, "step": 213050 }, { "epoch": 2.1, "grad_norm": 11.190059661865234, "learning_rate": 1.5004310344827588e-06, "loss": 0.1007, "step": 213075 }, { "epoch": 2.1, "grad_norm": 4.339849948883057, "learning_rate": 1.5e-06, "loss": 0.2026, "step": 213100 }, { "epoch": 2.1, "grad_norm": 6.352441787719727, "learning_rate": 1.4995689655172415e-06, "loss": 0.0934, "step": 213125 }, { "epoch": 2.1, "grad_norm": 4.847686290740967, "learning_rate": 1.4991379310344827e-06, "loss": 0.2416, "step": 213150 }, { "epoch": 2.1, "grad_norm": 7.4032511711120605, "learning_rate": 1.4987068965517242e-06, "loss": 0.0914, "step": 213175 }, { "epoch": 2.1, "grad_norm": 4.353194236755371, "learning_rate": 1.4982758620689654e-06, "loss": 0.2427, "step": 213200 }, { "epoch": 2.1, "grad_norm": 6.846559047698975, "learning_rate": 1.4978448275862071e-06, "loss": 0.1056, "step": 213225 }, { "epoch": 2.1, "grad_norm": 3.8679001331329346, "learning_rate": 1.4974137931034486e-06, "loss": 0.1956, "step": 213250 }, { "epoch": 2.1, "grad_norm": 11.707329750061035, "learning_rate": 1.4969827586206898e-06, "loss": 0.0895, "step": 213275 }, { "epoch": 2.1, "grad_norm": 5.384698867797852, "learning_rate": 1.4965517241379313e-06, "loss": 0.198, "step": 213300 }, { "epoch": 2.1, "grad_norm": 10.11719799041748, "learning_rate": 1.4961206896551725e-06, "loss": 0.0631, "step": 213325 }, { "epoch": 2.1, "grad_norm": 5.740279197692871, "learning_rate": 1.4957068965517244e-06, "loss": 0.2161, "step": 213350 }, { "epoch": 2.1, "grad_norm": 10.197783470153809, "learning_rate": 1.4952758620689656e-06, "loss": 0.0792, "step": 213375 }, { "epoch": 2.1, "grad_norm": 4.392314910888672, "learning_rate": 1.494844827586207e-06, "loss": 0.2062, "step": 213400 }, { "epoch": 2.1, "grad_norm": 3.7957100868225098, "learning_rate": 1.4944137931034483e-06, "loss": 0.0863, "step": 213425 }, { "epoch": 2.1, "grad_norm": 5.776305198669434, "learning_rate": 1.4939827586206898e-06, "loss": 0.2355, "step": 213450 }, { "epoch": 2.1, "grad_norm": 7.969460487365723, "learning_rate": 1.493551724137931e-06, "loss": 0.1281, "step": 213475 }, { "epoch": 2.1, "grad_norm": 3.103020668029785, "learning_rate": 1.4931206896551725e-06, "loss": 0.221, "step": 213500 }, { "epoch": 2.1, "grad_norm": 10.234057426452637, "learning_rate": 1.492689655172414e-06, "loss": 0.0729, "step": 213525 }, { "epoch": 2.1, "grad_norm": 4.666042327880859, "learning_rate": 1.4922586206896552e-06, "loss": 0.1989, "step": 213550 }, { "epoch": 2.1, "grad_norm": 8.12960433959961, "learning_rate": 1.4918275862068969e-06, "loss": 0.1122, "step": 213575 }, { "epoch": 2.1, "grad_norm": 4.185224533081055, "learning_rate": 1.491396551724138e-06, "loss": 0.2057, "step": 213600 }, { "epoch": 2.1, "grad_norm": 2.800135612487793, "learning_rate": 1.4909655172413796e-06, "loss": 0.0565, "step": 213625 }, { "epoch": 2.1, "grad_norm": 3.867455244064331, "learning_rate": 1.4905344827586208e-06, "loss": 0.2315, "step": 213650 }, { "epoch": 2.1, "grad_norm": 5.801549434661865, "learning_rate": 1.4901034482758623e-06, "loss": 0.0933, "step": 213675 }, { "epoch": 2.1, "grad_norm": 4.842745780944824, "learning_rate": 1.4896724137931035e-06, "loss": 0.2298, "step": 213700 }, { "epoch": 2.1, "grad_norm": 7.394084453582764, "learning_rate": 1.489241379310345e-06, "loss": 0.0876, "step": 213725 }, { "epoch": 2.1, "grad_norm": 3.378751039505005, "learning_rate": 1.4888103448275864e-06, "loss": 0.2393, "step": 213750 }, { "epoch": 2.1, "grad_norm": 3.6659326553344727, "learning_rate": 1.4883793103448277e-06, "loss": 0.089, "step": 213775 }, { "epoch": 2.1, "grad_norm": 4.160707473754883, "learning_rate": 1.4879482758620691e-06, "loss": 0.2007, "step": 213800 }, { "epoch": 2.1, "grad_norm": 8.807107925415039, "learning_rate": 1.4875172413793104e-06, "loss": 0.0996, "step": 213825 }, { "epoch": 2.1, "grad_norm": 13.000506401062012, "learning_rate": 1.4870862068965518e-06, "loss": 0.1778, "step": 213850 }, { "epoch": 2.1, "grad_norm": 7.237987518310547, "learning_rate": 1.486655172413793e-06, "loss": 0.1152, "step": 213875 }, { "epoch": 2.1, "grad_norm": 3.0361082553863525, "learning_rate": 1.4862241379310347e-06, "loss": 0.2163, "step": 213900 }, { "epoch": 2.1, "grad_norm": 8.891284942626953, "learning_rate": 1.4857931034482758e-06, "loss": 0.0807, "step": 213925 }, { "epoch": 2.1, "grad_norm": 3.9029290676116943, "learning_rate": 1.4853620689655174e-06, "loss": 0.1822, "step": 213950 }, { "epoch": 2.1, "grad_norm": 7.775481700897217, "learning_rate": 1.4849310344827587e-06, "loss": 0.0836, "step": 213975 }, { "epoch": 2.1, "grad_norm": 3.830305814743042, "learning_rate": 1.4845000000000001e-06, "loss": 0.206, "step": 214000 }, { "epoch": 2.11, "grad_norm": 10.635461807250977, "learning_rate": 1.4840689655172416e-06, "loss": 0.0639, "step": 214025 }, { "epoch": 2.11, "grad_norm": 3.95503568649292, "learning_rate": 1.4836379310344828e-06, "loss": 0.2143, "step": 214050 }, { "epoch": 2.11, "grad_norm": 7.795060634613037, "learning_rate": 1.4832068965517243e-06, "loss": 0.1025, "step": 214075 }, { "epoch": 2.11, "grad_norm": 6.439891815185547, "learning_rate": 1.4827758620689655e-06, "loss": 0.2236, "step": 214100 }, { "epoch": 2.11, "grad_norm": 0.5402824282646179, "learning_rate": 1.482344827586207e-06, "loss": 0.0841, "step": 214125 }, { "epoch": 2.11, "grad_norm": 6.304648399353027, "learning_rate": 1.4819137931034482e-06, "loss": 0.1879, "step": 214150 }, { "epoch": 2.11, "grad_norm": 7.7961273193359375, "learning_rate": 1.4814827586206897e-06, "loss": 0.1025, "step": 214175 }, { "epoch": 2.11, "grad_norm": 2.7381646633148193, "learning_rate": 1.481051724137931e-06, "loss": 0.2088, "step": 214200 }, { "epoch": 2.11, "grad_norm": 10.230452537536621, "learning_rate": 1.4806206896551726e-06, "loss": 0.0875, "step": 214225 }, { "epoch": 2.11, "grad_norm": 5.521280765533447, "learning_rate": 1.480189655172414e-06, "loss": 0.1866, "step": 214250 }, { "epoch": 2.11, "grad_norm": 8.754709243774414, "learning_rate": 1.4797586206896553e-06, "loss": 0.0963, "step": 214275 }, { "epoch": 2.11, "grad_norm": 4.21814489364624, "learning_rate": 1.4793275862068968e-06, "loss": 0.2222, "step": 214300 }, { "epoch": 2.11, "grad_norm": 9.475939750671387, "learning_rate": 1.478896551724138e-06, "loss": 0.0842, "step": 214325 }, { "epoch": 2.11, "grad_norm": 4.117952823638916, "learning_rate": 1.4784655172413795e-06, "loss": 0.1996, "step": 214350 }, { "epoch": 2.11, "grad_norm": 10.332314491271973, "learning_rate": 1.4780344827586207e-06, "loss": 0.1107, "step": 214375 }, { "epoch": 2.11, "grad_norm": 6.388282775878906, "learning_rate": 1.4776034482758622e-06, "loss": 0.3135, "step": 214400 }, { "epoch": 2.11, "grad_norm": 5.751850128173828, "learning_rate": 1.4771724137931034e-06, "loss": 0.0984, "step": 214425 }, { "epoch": 2.11, "grad_norm": 3.9066874980926514, "learning_rate": 1.4767413793103449e-06, "loss": 0.1745, "step": 214450 }, { "epoch": 2.11, "grad_norm": 4.704862594604492, "learning_rate": 1.4763103448275865e-06, "loss": 0.0899, "step": 214475 }, { "epoch": 2.11, "grad_norm": 4.1008734703063965, "learning_rate": 1.4758793103448276e-06, "loss": 0.1999, "step": 214500 }, { "epoch": 2.11, "grad_norm": 12.328300476074219, "learning_rate": 1.4754482758620692e-06, "loss": 0.1124, "step": 214525 }, { "epoch": 2.11, "grad_norm": 4.706293106079102, "learning_rate": 1.4750172413793105e-06, "loss": 0.2332, "step": 214550 }, { "epoch": 2.11, "grad_norm": 11.620747566223145, "learning_rate": 1.474586206896552e-06, "loss": 0.1224, "step": 214575 }, { "epoch": 2.11, "grad_norm": 4.403857707977295, "learning_rate": 1.4741551724137932e-06, "loss": 0.1894, "step": 214600 }, { "epoch": 2.11, "grad_norm": 6.860311508178711, "learning_rate": 1.4737241379310346e-06, "loss": 0.0976, "step": 214625 }, { "epoch": 2.11, "grad_norm": 6.264274597167969, "learning_rate": 1.473293103448276e-06, "loss": 0.1966, "step": 214650 }, { "epoch": 2.11, "grad_norm": 4.851132392883301, "learning_rate": 1.4728620689655174e-06, "loss": 0.0805, "step": 214675 }, { "epoch": 2.11, "grad_norm": 4.554104804992676, "learning_rate": 1.4724310344827588e-06, "loss": 0.1995, "step": 214700 }, { "epoch": 2.11, "grad_norm": 2.0177500247955322, "learning_rate": 1.472e-06, "loss": 0.0798, "step": 214725 }, { "epoch": 2.11, "grad_norm": 4.62809419631958, "learning_rate": 1.4715689655172415e-06, "loss": 0.1945, "step": 214750 }, { "epoch": 2.11, "grad_norm": 5.6867194175720215, "learning_rate": 1.4711379310344828e-06, "loss": 0.1225, "step": 214775 }, { "epoch": 2.11, "grad_norm": 6.982128620147705, "learning_rate": 1.4707068965517244e-06, "loss": 0.1862, "step": 214800 }, { "epoch": 2.11, "grad_norm": 5.0674285888671875, "learning_rate": 1.4702758620689655e-06, "loss": 0.0947, "step": 214825 }, { "epoch": 2.11, "grad_norm": 4.072627544403076, "learning_rate": 1.4698448275862071e-06, "loss": 0.2409, "step": 214850 }, { "epoch": 2.11, "grad_norm": 1.562981367111206, "learning_rate": 1.4694137931034484e-06, "loss": 0.1039, "step": 214875 }, { "epoch": 2.11, "grad_norm": 4.226415157318115, "learning_rate": 1.4689827586206898e-06, "loss": 0.2379, "step": 214900 }, { "epoch": 2.11, "grad_norm": 5.8641886711120605, "learning_rate": 1.4685517241379313e-06, "loss": 0.0683, "step": 214925 }, { "epoch": 2.11, "grad_norm": 4.871860980987549, "learning_rate": 1.4681206896551725e-06, "loss": 0.2091, "step": 214950 }, { "epoch": 2.11, "grad_norm": 5.452811241149902, "learning_rate": 1.467689655172414e-06, "loss": 0.1044, "step": 214975 }, { "epoch": 2.11, "grad_norm": 5.289492130279541, "learning_rate": 1.4672586206896552e-06, "loss": 0.1946, "step": 215000 }, { "epoch": 2.11, "grad_norm": 8.168548583984375, "learning_rate": 1.4668275862068967e-06, "loss": 0.0739, "step": 215025 }, { "epoch": 2.12, "grad_norm": 4.619662284851074, "learning_rate": 1.466396551724138e-06, "loss": 0.2276, "step": 215050 }, { "epoch": 2.12, "grad_norm": 9.09640121459961, "learning_rate": 1.4659655172413794e-06, "loss": 0.0891, "step": 215075 }, { "epoch": 2.12, "grad_norm": 3.9024624824523926, "learning_rate": 1.4655344827586206e-06, "loss": 0.1918, "step": 215100 }, { "epoch": 2.12, "grad_norm": 3.073367118835449, "learning_rate": 1.4651034482758623e-06, "loss": 0.0837, "step": 215125 }, { "epoch": 2.12, "grad_norm": 5.125697612762451, "learning_rate": 1.4646724137931033e-06, "loss": 0.2137, "step": 215150 }, { "epoch": 2.12, "grad_norm": 9.180248260498047, "learning_rate": 1.464241379310345e-06, "loss": 0.0754, "step": 215175 }, { "epoch": 2.12, "grad_norm": 3.4952480792999268, "learning_rate": 1.4638103448275865e-06, "loss": 0.2125, "step": 215200 }, { "epoch": 2.12, "grad_norm": 12.145590782165527, "learning_rate": 1.4633793103448277e-06, "loss": 0.1026, "step": 215225 }, { "epoch": 2.12, "grad_norm": 3.742563009262085, "learning_rate": 1.4629482758620692e-06, "loss": 0.2278, "step": 215250 }, { "epoch": 2.12, "grad_norm": 5.262095928192139, "learning_rate": 1.4625172413793104e-06, "loss": 0.0787, "step": 215275 }, { "epoch": 2.12, "grad_norm": 4.140275001525879, "learning_rate": 1.4620862068965519e-06, "loss": 0.1829, "step": 215300 }, { "epoch": 2.12, "grad_norm": 6.884317874908447, "learning_rate": 1.461655172413793e-06, "loss": 0.105, "step": 215325 }, { "epoch": 2.12, "grad_norm": 3.923978328704834, "learning_rate": 1.4612241379310346e-06, "loss": 0.2146, "step": 215350 }, { "epoch": 2.12, "grad_norm": 7.1571364402771, "learning_rate": 1.4607931034482758e-06, "loss": 0.0947, "step": 215375 }, { "epoch": 2.12, "grad_norm": 5.565766334533691, "learning_rate": 1.4603620689655173e-06, "loss": 0.2018, "step": 215400 }, { "epoch": 2.12, "grad_norm": 1.7601169347763062, "learning_rate": 1.459931034482759e-06, "loss": 0.1219, "step": 215425 }, { "epoch": 2.12, "grad_norm": 4.107024669647217, "learning_rate": 1.4595000000000002e-06, "loss": 0.2048, "step": 215450 }, { "epoch": 2.12, "grad_norm": 5.353238582611084, "learning_rate": 1.4590689655172416e-06, "loss": 0.0847, "step": 215475 }, { "epoch": 2.12, "grad_norm": 4.331935882568359, "learning_rate": 1.4586379310344829e-06, "loss": 0.2181, "step": 215500 }, { "epoch": 2.12, "grad_norm": 10.640109062194824, "learning_rate": 1.4582068965517243e-06, "loss": 0.1141, "step": 215525 }, { "epoch": 2.12, "grad_norm": 3.5697579383850098, "learning_rate": 1.4577758620689656e-06, "loss": 0.2098, "step": 215550 }, { "epoch": 2.12, "grad_norm": 4.701938152313232, "learning_rate": 1.457344827586207e-06, "loss": 0.0805, "step": 215575 }, { "epoch": 2.12, "grad_norm": 5.290531635284424, "learning_rate": 1.4569137931034483e-06, "loss": 0.1836, "step": 215600 }, { "epoch": 2.12, "grad_norm": 14.58173942565918, "learning_rate": 1.4564827586206897e-06, "loss": 0.1077, "step": 215625 }, { "epoch": 2.12, "grad_norm": 4.306436061859131, "learning_rate": 1.4560517241379312e-06, "loss": 0.2459, "step": 215650 }, { "epoch": 2.12, "grad_norm": 6.551628112792969, "learning_rate": 1.4556206896551724e-06, "loss": 0.0722, "step": 215675 }, { "epoch": 2.12, "grad_norm": 2.907715320587158, "learning_rate": 1.455189655172414e-06, "loss": 0.1874, "step": 215700 }, { "epoch": 2.12, "grad_norm": 9.159305572509766, "learning_rate": 1.4547586206896551e-06, "loss": 0.0784, "step": 215725 }, { "epoch": 2.12, "grad_norm": 4.41304874420166, "learning_rate": 1.4543275862068968e-06, "loss": 0.2124, "step": 215750 }, { "epoch": 2.12, "grad_norm": 10.930219650268555, "learning_rate": 1.453896551724138e-06, "loss": 0.1041, "step": 215775 }, { "epoch": 2.12, "grad_norm": 5.306206226348877, "learning_rate": 1.4534655172413795e-06, "loss": 0.1891, "step": 215800 }, { "epoch": 2.12, "grad_norm": 6.953948497772217, "learning_rate": 1.4530344827586207e-06, "loss": 0.0855, "step": 215825 }, { "epoch": 2.12, "grad_norm": 5.202932834625244, "learning_rate": 1.4526034482758622e-06, "loss": 0.2266, "step": 215850 }, { "epoch": 2.12, "grad_norm": 8.541375160217285, "learning_rate": 1.4521724137931037e-06, "loss": 0.086, "step": 215875 }, { "epoch": 2.12, "grad_norm": 3.365875005722046, "learning_rate": 1.451741379310345e-06, "loss": 0.2293, "step": 215900 }, { "epoch": 2.12, "grad_norm": 6.806558609008789, "learning_rate": 1.4513103448275864e-06, "loss": 0.0638, "step": 215925 }, { "epoch": 2.12, "grad_norm": 8.445831298828125, "learning_rate": 1.4508793103448276e-06, "loss": 0.207, "step": 215950 }, { "epoch": 2.12, "grad_norm": 5.617287635803223, "learning_rate": 1.450448275862069e-06, "loss": 0.093, "step": 215975 }, { "epoch": 2.12, "grad_norm": 2.8272812366485596, "learning_rate": 1.4500172413793103e-06, "loss": 0.1956, "step": 216000 }, { "epoch": 2.12, "grad_norm": 12.445186614990234, "learning_rate": 1.449586206896552e-06, "loss": 0.0882, "step": 216025 }, { "epoch": 2.12, "grad_norm": 4.365021705627441, "learning_rate": 1.449155172413793e-06, "loss": 0.1731, "step": 216050 }, { "epoch": 2.13, "grad_norm": 3.23761248588562, "learning_rate": 1.4487241379310347e-06, "loss": 0.0829, "step": 216075 }, { "epoch": 2.13, "grad_norm": 5.320857524871826, "learning_rate": 1.4482931034482761e-06, "loss": 0.2211, "step": 216100 }, { "epoch": 2.13, "grad_norm": 10.754186630249023, "learning_rate": 1.4478620689655174e-06, "loss": 0.0776, "step": 216125 }, { "epoch": 2.13, "grad_norm": 3.5616118907928467, "learning_rate": 1.4474310344827588e-06, "loss": 0.18, "step": 216150 }, { "epoch": 2.13, "grad_norm": 2.597064256668091, "learning_rate": 1.447e-06, "loss": 0.0942, "step": 216175 }, { "epoch": 2.13, "grad_norm": 2.337143898010254, "learning_rate": 1.4465689655172415e-06, "loss": 0.2181, "step": 216200 }, { "epoch": 2.13, "grad_norm": 8.960710525512695, "learning_rate": 1.4461379310344828e-06, "loss": 0.0811, "step": 216225 }, { "epoch": 2.13, "grad_norm": 5.098638534545898, "learning_rate": 1.4457068965517242e-06, "loss": 0.1909, "step": 216250 }, { "epoch": 2.13, "grad_norm": 9.22921371459961, "learning_rate": 1.4452758620689655e-06, "loss": 0.1043, "step": 216275 }, { "epoch": 2.13, "grad_norm": 11.426430702209473, "learning_rate": 1.444844827586207e-06, "loss": 0.1904, "step": 216300 }, { "epoch": 2.13, "grad_norm": 11.049416542053223, "learning_rate": 1.4444137931034482e-06, "loss": 0.0932, "step": 216325 }, { "epoch": 2.13, "grad_norm": 3.5518906116485596, "learning_rate": 1.4439827586206899e-06, "loss": 0.198, "step": 216350 }, { "epoch": 2.13, "grad_norm": 5.125944137573242, "learning_rate": 1.4435517241379313e-06, "loss": 0.0751, "step": 216375 }, { "epoch": 2.13, "grad_norm": 3.6098546981811523, "learning_rate": 1.4431206896551726e-06, "loss": 0.1815, "step": 216400 }, { "epoch": 2.13, "grad_norm": 4.810217380523682, "learning_rate": 1.442689655172414e-06, "loss": 0.0822, "step": 216425 }, { "epoch": 2.13, "grad_norm": 4.2962517738342285, "learning_rate": 1.4422586206896553e-06, "loss": 0.2072, "step": 216450 }, { "epoch": 2.13, "grad_norm": 9.209589004516602, "learning_rate": 1.4418275862068967e-06, "loss": 0.1072, "step": 216475 }, { "epoch": 2.13, "grad_norm": 4.773381233215332, "learning_rate": 1.441396551724138e-06, "loss": 0.1847, "step": 216500 }, { "epoch": 2.13, "grad_norm": 18.335886001586914, "learning_rate": 1.4409655172413794e-06, "loss": 0.0995, "step": 216525 }, { "epoch": 2.13, "grad_norm": 5.072849750518799, "learning_rate": 1.4405344827586207e-06, "loss": 0.2114, "step": 216550 }, { "epoch": 2.13, "grad_norm": 7.124208450317383, "learning_rate": 1.4401034482758621e-06, "loss": 0.0689, "step": 216575 }, { "epoch": 2.13, "grad_norm": 4.486841678619385, "learning_rate": 1.4396724137931038e-06, "loss": 0.2336, "step": 216600 }, { "epoch": 2.13, "grad_norm": 7.755726337432861, "learning_rate": 1.4392413793103448e-06, "loss": 0.0839, "step": 216625 }, { "epoch": 2.13, "grad_norm": 4.29028844833374, "learning_rate": 1.4388103448275865e-06, "loss": 0.1894, "step": 216650 }, { "epoch": 2.13, "grad_norm": 11.841287612915039, "learning_rate": 1.4383793103448277e-06, "loss": 0.0694, "step": 216675 }, { "epoch": 2.13, "grad_norm": 9.780041694641113, "learning_rate": 1.4379482758620692e-06, "loss": 0.1989, "step": 216700 }, { "epoch": 2.13, "grad_norm": 4.9622111320495605, "learning_rate": 1.4375172413793104e-06, "loss": 0.0871, "step": 216725 }, { "epoch": 2.13, "grad_norm": 4.49074125289917, "learning_rate": 1.4370862068965519e-06, "loss": 0.2362, "step": 216750 }, { "epoch": 2.13, "grad_norm": 3.386068344116211, "learning_rate": 1.4366551724137931e-06, "loss": 0.0758, "step": 216775 }, { "epoch": 2.13, "grad_norm": 4.5475287437438965, "learning_rate": 1.4362241379310346e-06, "loss": 0.2018, "step": 216800 }, { "epoch": 2.13, "grad_norm": 7.73502779006958, "learning_rate": 1.435793103448276e-06, "loss": 0.0888, "step": 216825 }, { "epoch": 2.13, "grad_norm": 4.073541164398193, "learning_rate": 1.4353620689655173e-06, "loss": 0.1992, "step": 216850 }, { "epoch": 2.13, "grad_norm": 5.389820575714111, "learning_rate": 1.4349310344827587e-06, "loss": 0.0938, "step": 216875 }, { "epoch": 2.13, "grad_norm": 3.033764600753784, "learning_rate": 1.4345e-06, "loss": 0.1871, "step": 216900 }, { "epoch": 2.13, "grad_norm": 4.242451190948486, "learning_rate": 1.4340689655172417e-06, "loss": 0.1005, "step": 216925 }, { "epoch": 2.13, "grad_norm": 3.1625547409057617, "learning_rate": 1.4336379310344827e-06, "loss": 0.1933, "step": 216950 }, { "epoch": 2.13, "grad_norm": 5.661354064941406, "learning_rate": 1.4332068965517244e-06, "loss": 0.0963, "step": 216975 }, { "epoch": 2.13, "grad_norm": 5.64707088470459, "learning_rate": 1.4327758620689656e-06, "loss": 0.2223, "step": 217000 }, { "epoch": 2.13, "grad_norm": 7.105316638946533, "learning_rate": 1.432344827586207e-06, "loss": 0.1031, "step": 217025 }, { "epoch": 2.13, "grad_norm": 7.5001068115234375, "learning_rate": 1.4319137931034485e-06, "loss": 0.1866, "step": 217050 }, { "epoch": 2.14, "grad_norm": 6.420507431030273, "learning_rate": 1.4314827586206898e-06, "loss": 0.1108, "step": 217075 }, { "epoch": 2.14, "grad_norm": 4.136117458343506, "learning_rate": 1.4310517241379312e-06, "loss": 0.1917, "step": 217100 }, { "epoch": 2.14, "grad_norm": 11.673136711120605, "learning_rate": 1.4306206896551725e-06, "loss": 0.1172, "step": 217125 }, { "epoch": 2.14, "grad_norm": 4.837462425231934, "learning_rate": 1.430189655172414e-06, "loss": 0.1928, "step": 217150 }, { "epoch": 2.14, "grad_norm": 2.7858991622924805, "learning_rate": 1.4297586206896552e-06, "loss": 0.0859, "step": 217175 }, { "epoch": 2.14, "grad_norm": 4.495761394500732, "learning_rate": 1.4293275862068966e-06, "loss": 0.2309, "step": 217200 }, { "epoch": 2.14, "grad_norm": 8.750597953796387, "learning_rate": 1.4288965517241379e-06, "loss": 0.093, "step": 217225 }, { "epoch": 2.14, "grad_norm": 4.520514965057373, "learning_rate": 1.4284655172413795e-06, "loss": 0.2219, "step": 217250 }, { "epoch": 2.14, "grad_norm": 9.320510864257812, "learning_rate": 1.428034482758621e-06, "loss": 0.0902, "step": 217275 }, { "epoch": 2.14, "grad_norm": 4.443761348724365, "learning_rate": 1.4276034482758622e-06, "loss": 0.2056, "step": 217300 }, { "epoch": 2.14, "grad_norm": 8.837844848632812, "learning_rate": 1.4271724137931037e-06, "loss": 0.1065, "step": 217325 }, { "epoch": 2.14, "grad_norm": 4.158043384552002, "learning_rate": 1.426741379310345e-06, "loss": 0.1988, "step": 217350 }, { "epoch": 2.14, "grad_norm": 13.64193058013916, "learning_rate": 1.4263103448275864e-06, "loss": 0.125, "step": 217375 }, { "epoch": 2.14, "grad_norm": 3.722440004348755, "learning_rate": 1.4258793103448276e-06, "loss": 0.2157, "step": 217400 }, { "epoch": 2.14, "grad_norm": 9.960809707641602, "learning_rate": 1.425448275862069e-06, "loss": 0.0956, "step": 217425 }, { "epoch": 2.14, "grad_norm": 4.110671520233154, "learning_rate": 1.4250172413793103e-06, "loss": 0.1741, "step": 217450 }, { "epoch": 2.14, "grad_norm": 8.192168235778809, "learning_rate": 1.4245862068965518e-06, "loss": 0.0981, "step": 217475 }, { "epoch": 2.14, "grad_norm": 5.142533302307129, "learning_rate": 1.424155172413793e-06, "loss": 0.2239, "step": 217500 }, { "epoch": 2.14, "grad_norm": 1.3012689352035522, "learning_rate": 1.4237241379310345e-06, "loss": 0.1019, "step": 217525 }, { "epoch": 2.14, "grad_norm": 6.416048049926758, "learning_rate": 1.4233103448275864e-06, "loss": 0.2174, "step": 217550 }, { "epoch": 2.14, "grad_norm": 9.689000129699707, "learning_rate": 1.4228793103448276e-06, "loss": 0.0941, "step": 217575 }, { "epoch": 2.14, "grad_norm": 3.525465726852417, "learning_rate": 1.4224482758620693e-06, "loss": 0.2024, "step": 217600 }, { "epoch": 2.14, "grad_norm": 5.729111671447754, "learning_rate": 1.4220172413793103e-06, "loss": 0.0815, "step": 217625 }, { "epoch": 2.14, "grad_norm": 8.87385368347168, "learning_rate": 1.421586206896552e-06, "loss": 0.219, "step": 217650 }, { "epoch": 2.14, "grad_norm": 13.418842315673828, "learning_rate": 1.4211551724137932e-06, "loss": 0.0895, "step": 217675 }, { "epoch": 2.14, "grad_norm": 4.289250373840332, "learning_rate": 1.4207241379310347e-06, "loss": 0.1966, "step": 217700 }, { "epoch": 2.14, "grad_norm": 9.619125366210938, "learning_rate": 1.420293103448276e-06, "loss": 0.0773, "step": 217725 }, { "epoch": 2.14, "grad_norm": 5.490204334259033, "learning_rate": 1.4198620689655174e-06, "loss": 0.1969, "step": 217750 }, { "epoch": 2.14, "grad_norm": 6.320809364318848, "learning_rate": 1.4194310344827588e-06, "loss": 0.0823, "step": 217775 }, { "epoch": 2.14, "grad_norm": 5.745292663574219, "learning_rate": 1.419e-06, "loss": 0.1859, "step": 217800 }, { "epoch": 2.14, "grad_norm": 4.940600395202637, "learning_rate": 1.4185689655172416e-06, "loss": 0.0911, "step": 217825 }, { "epoch": 2.14, "grad_norm": 3.7712650299072266, "learning_rate": 1.4181379310344828e-06, "loss": 0.2247, "step": 217850 }, { "epoch": 2.14, "grad_norm": 6.970971584320068, "learning_rate": 1.4177068965517243e-06, "loss": 0.1022, "step": 217875 }, { "epoch": 2.14, "grad_norm": 3.4505975246429443, "learning_rate": 1.4172758620689655e-06, "loss": 0.2148, "step": 217900 }, { "epoch": 2.14, "grad_norm": 11.283238410949707, "learning_rate": 1.4168448275862072e-06, "loss": 0.1227, "step": 217925 }, { "epoch": 2.14, "grad_norm": 3.4622199535369873, "learning_rate": 1.4164137931034482e-06, "loss": 0.2295, "step": 217950 }, { "epoch": 2.14, "grad_norm": 6.677395820617676, "learning_rate": 1.4159827586206899e-06, "loss": 0.0766, "step": 217975 }, { "epoch": 2.14, "grad_norm": 4.179995059967041, "learning_rate": 1.4155517241379311e-06, "loss": 0.2332, "step": 218000 }, { "epoch": 2.14, "grad_norm": 16.117887496948242, "learning_rate": 1.4151206896551726e-06, "loss": 0.1038, "step": 218025 }, { "epoch": 2.14, "grad_norm": 4.634519577026367, "learning_rate": 1.414689655172414e-06, "loss": 0.2142, "step": 218050 }, { "epoch": 2.14, "grad_norm": 2.187753438949585, "learning_rate": 1.4142586206896553e-06, "loss": 0.0785, "step": 218075 }, { "epoch": 2.15, "grad_norm": 2.4907801151275635, "learning_rate": 1.4138275862068967e-06, "loss": 0.2075, "step": 218100 }, { "epoch": 2.15, "grad_norm": 5.708163738250732, "learning_rate": 1.413396551724138e-06, "loss": 0.1011, "step": 218125 }, { "epoch": 2.15, "grad_norm": 4.218257904052734, "learning_rate": 1.4129655172413794e-06, "loss": 0.1583, "step": 218150 }, { "epoch": 2.15, "grad_norm": 11.270764350891113, "learning_rate": 1.4125344827586207e-06, "loss": 0.0868, "step": 218175 }, { "epoch": 2.15, "grad_norm": 3.54729962348938, "learning_rate": 1.4121034482758621e-06, "loss": 0.1761, "step": 218200 }, { "epoch": 2.15, "grad_norm": 6.670653820037842, "learning_rate": 1.4116724137931034e-06, "loss": 0.0894, "step": 218225 }, { "epoch": 2.15, "grad_norm": 4.466283798217773, "learning_rate": 1.411241379310345e-06, "loss": 0.1929, "step": 218250 }, { "epoch": 2.15, "grad_norm": 6.52210807800293, "learning_rate": 1.4108103448275865e-06, "loss": 0.0849, "step": 218275 }, { "epoch": 2.15, "grad_norm": 4.089939594268799, "learning_rate": 1.4103793103448277e-06, "loss": 0.2312, "step": 218300 }, { "epoch": 2.15, "grad_norm": 5.862168788909912, "learning_rate": 1.4099482758620692e-06, "loss": 0.0924, "step": 218325 }, { "epoch": 2.15, "grad_norm": 3.859593629837036, "learning_rate": 1.4095172413793104e-06, "loss": 0.1932, "step": 218350 }, { "epoch": 2.15, "grad_norm": 12.893254280090332, "learning_rate": 1.409086206896552e-06, "loss": 0.1075, "step": 218375 }, { "epoch": 2.15, "grad_norm": 4.294954299926758, "learning_rate": 1.4086551724137931e-06, "loss": 0.1963, "step": 218400 }, { "epoch": 2.15, "grad_norm": 6.228566646575928, "learning_rate": 1.4082241379310346e-06, "loss": 0.0692, "step": 218425 }, { "epoch": 2.15, "grad_norm": 2.7218141555786133, "learning_rate": 1.4077931034482758e-06, "loss": 0.22, "step": 218450 }, { "epoch": 2.15, "grad_norm": 3.9534027576446533, "learning_rate": 1.4073620689655173e-06, "loss": 0.0834, "step": 218475 }, { "epoch": 2.15, "grad_norm": 4.194166660308838, "learning_rate": 1.4069310344827588e-06, "loss": 0.192, "step": 218500 }, { "epoch": 2.15, "grad_norm": 3.045855760574341, "learning_rate": 1.4065e-06, "loss": 0.0765, "step": 218525 }, { "epoch": 2.15, "grad_norm": 5.476314544677734, "learning_rate": 1.4060689655172417e-06, "loss": 0.2124, "step": 218550 }, { "epoch": 2.15, "grad_norm": 8.327701568603516, "learning_rate": 1.405637931034483e-06, "loss": 0.0849, "step": 218575 }, { "epoch": 2.15, "grad_norm": 2.9956047534942627, "learning_rate": 1.4052068965517244e-06, "loss": 0.1789, "step": 218600 }, { "epoch": 2.15, "grad_norm": 5.83494758605957, "learning_rate": 1.4047758620689656e-06, "loss": 0.0876, "step": 218625 }, { "epoch": 2.15, "grad_norm": 3.6706833839416504, "learning_rate": 1.404344827586207e-06, "loss": 0.192, "step": 218650 }, { "epoch": 2.15, "grad_norm": 3.7256250381469727, "learning_rate": 1.4039137931034483e-06, "loss": 0.0946, "step": 218675 }, { "epoch": 2.15, "grad_norm": 3.517543077468872, "learning_rate": 1.4034827586206898e-06, "loss": 0.1907, "step": 218700 }, { "epoch": 2.15, "grad_norm": 9.860156059265137, "learning_rate": 1.4030517241379312e-06, "loss": 0.0936, "step": 218725 }, { "epoch": 2.15, "grad_norm": 4.5273823738098145, "learning_rate": 1.4026206896551725e-06, "loss": 0.1713, "step": 218750 }, { "epoch": 2.15, "grad_norm": 11.348947525024414, "learning_rate": 1.402189655172414e-06, "loss": 0.0869, "step": 218775 }, { "epoch": 2.15, "grad_norm": 4.647448539733887, "learning_rate": 1.4017586206896552e-06, "loss": 0.225, "step": 218800 }, { "epoch": 2.15, "grad_norm": 6.94767951965332, "learning_rate": 1.4013275862068968e-06, "loss": 0.0853, "step": 218825 }, { "epoch": 2.15, "grad_norm": 4.439239501953125, "learning_rate": 1.4008965517241379e-06, "loss": 0.2057, "step": 218850 }, { "epoch": 2.15, "grad_norm": 12.657708168029785, "learning_rate": 1.4004655172413795e-06, "loss": 0.0911, "step": 218875 }, { "epoch": 2.15, "grad_norm": 3.2838294506073, "learning_rate": 1.4000344827586208e-06, "loss": 0.21, "step": 218900 }, { "epoch": 2.15, "grad_norm": 3.910738468170166, "learning_rate": 1.3996034482758622e-06, "loss": 0.1039, "step": 218925 }, { "epoch": 2.15, "grad_norm": 4.177953243255615, "learning_rate": 1.3991724137931037e-06, "loss": 0.2312, "step": 218950 }, { "epoch": 2.15, "grad_norm": 4.524887561798096, "learning_rate": 1.398741379310345e-06, "loss": 0.087, "step": 218975 }, { "epoch": 2.15, "grad_norm": 4.10107946395874, "learning_rate": 1.3983103448275864e-06, "loss": 0.1967, "step": 219000 }, { "epoch": 2.15, "grad_norm": 7.143749713897705, "learning_rate": 1.3978793103448276e-06, "loss": 0.0811, "step": 219025 }, { "epoch": 2.15, "grad_norm": 5.0153117179870605, "learning_rate": 1.397448275862069e-06, "loss": 0.2202, "step": 219050 }, { "epoch": 2.15, "grad_norm": 12.49770450592041, "learning_rate": 1.3970172413793103e-06, "loss": 0.0809, "step": 219075 }, { "epoch": 2.15, "grad_norm": 4.432901382446289, "learning_rate": 1.3965862068965518e-06, "loss": 0.2071, "step": 219100 }, { "epoch": 2.16, "grad_norm": 13.981754302978516, "learning_rate": 1.396155172413793e-06, "loss": 0.1027, "step": 219125 }, { "epoch": 2.16, "grad_norm": 3.6827380657196045, "learning_rate": 1.3957241379310347e-06, "loss": 0.2094, "step": 219150 }, { "epoch": 2.16, "grad_norm": 7.533679485321045, "learning_rate": 1.3952931034482758e-06, "loss": 0.0793, "step": 219175 }, { "epoch": 2.16, "grad_norm": 6.582676410675049, "learning_rate": 1.3948620689655174e-06, "loss": 0.2038, "step": 219200 }, { "epoch": 2.16, "grad_norm": 5.175748825073242, "learning_rate": 1.3944310344827589e-06, "loss": 0.0675, "step": 219225 }, { "epoch": 2.16, "grad_norm": 4.206204414367676, "learning_rate": 1.3940000000000001e-06, "loss": 0.1823, "step": 219250 }, { "epoch": 2.16, "grad_norm": 5.838457107543945, "learning_rate": 1.3935689655172416e-06, "loss": 0.0787, "step": 219275 }, { "epoch": 2.16, "grad_norm": 4.092706680297852, "learning_rate": 1.3931379310344828e-06, "loss": 0.1629, "step": 219300 }, { "epoch": 2.16, "grad_norm": 11.319330215454102, "learning_rate": 1.3927068965517243e-06, "loss": 0.082, "step": 219325 }, { "epoch": 2.16, "grad_norm": 4.11406135559082, "learning_rate": 1.3922758620689655e-06, "loss": 0.2108, "step": 219350 }, { "epoch": 2.16, "grad_norm": 3.1126348972320557, "learning_rate": 1.391844827586207e-06, "loss": 0.0697, "step": 219375 }, { "epoch": 2.16, "grad_norm": 4.771113872528076, "learning_rate": 1.3914137931034482e-06, "loss": 0.1934, "step": 219400 }, { "epoch": 2.16, "grad_norm": 9.116281509399414, "learning_rate": 1.3909827586206897e-06, "loss": 0.1058, "step": 219425 }, { "epoch": 2.16, "grad_norm": 2.7626123428344727, "learning_rate": 1.3905517241379313e-06, "loss": 0.1977, "step": 219450 }, { "epoch": 2.16, "grad_norm": 5.25717306137085, "learning_rate": 1.3901206896551726e-06, "loss": 0.0788, "step": 219475 }, { "epoch": 2.16, "grad_norm": 4.982656478881836, "learning_rate": 1.389689655172414e-06, "loss": 0.1974, "step": 219500 }, { "epoch": 2.16, "grad_norm": 12.37270736694336, "learning_rate": 1.3892586206896553e-06, "loss": 0.1002, "step": 219525 }, { "epoch": 2.16, "grad_norm": 4.837098121643066, "learning_rate": 1.3888275862068968e-06, "loss": 0.2116, "step": 219550 }, { "epoch": 2.16, "grad_norm": 9.964102745056152, "learning_rate": 1.388396551724138e-06, "loss": 0.0897, "step": 219575 }, { "epoch": 2.16, "grad_norm": 3.947274923324585, "learning_rate": 1.3879655172413795e-06, "loss": 0.1924, "step": 219600 }, { "epoch": 2.16, "grad_norm": 6.200960636138916, "learning_rate": 1.3875344827586207e-06, "loss": 0.0773, "step": 219625 }, { "epoch": 2.16, "grad_norm": 5.5285444259643555, "learning_rate": 1.3871206896551726e-06, "loss": 0.164, "step": 219650 }, { "epoch": 2.16, "grad_norm": 8.36413860321045, "learning_rate": 1.3866896551724138e-06, "loss": 0.0762, "step": 219675 }, { "epoch": 2.16, "grad_norm": 3.7395331859588623, "learning_rate": 1.3862586206896553e-06, "loss": 0.2172, "step": 219700 }, { "epoch": 2.16, "grad_norm": 13.637110710144043, "learning_rate": 1.3858275862068967e-06, "loss": 0.1051, "step": 219725 }, { "epoch": 2.16, "grad_norm": 4.756360054016113, "learning_rate": 1.385396551724138e-06, "loss": 0.2138, "step": 219750 }, { "epoch": 2.16, "grad_norm": 4.259678840637207, "learning_rate": 1.3849655172413794e-06, "loss": 0.0768, "step": 219775 }, { "epoch": 2.16, "grad_norm": 4.663431167602539, "learning_rate": 1.3845344827586207e-06, "loss": 0.1828, "step": 219800 }, { "epoch": 2.16, "grad_norm": 5.106263160705566, "learning_rate": 1.3841034482758621e-06, "loss": 0.0905, "step": 219825 }, { "epoch": 2.16, "grad_norm": 28.957731246948242, "learning_rate": 1.3836724137931034e-06, "loss": 0.1877, "step": 219850 }, { "epoch": 2.16, "grad_norm": 13.570568084716797, "learning_rate": 1.383241379310345e-06, "loss": 0.123, "step": 219875 }, { "epoch": 2.16, "grad_norm": 3.996798276901245, "learning_rate": 1.3828103448275863e-06, "loss": 0.1986, "step": 219900 }, { "epoch": 2.16, "grad_norm": 9.120245933532715, "learning_rate": 1.3823793103448278e-06, "loss": 0.0816, "step": 219925 }, { "epoch": 2.16, "grad_norm": 4.22446346282959, "learning_rate": 1.3819482758620692e-06, "loss": 0.2402, "step": 219950 }, { "epoch": 2.16, "grad_norm": 6.4786176681518555, "learning_rate": 1.3815172413793105e-06, "loss": 0.0946, "step": 219975 }, { "epoch": 2.16, "grad_norm": 5.400475978851318, "learning_rate": 1.381086206896552e-06, "loss": 0.1918, "step": 220000 }, { "epoch": 2.16, "eval_loss": 0.5914686322212219, "eval_runtime": 5725.0865, "eval_samples_per_second": 1.654, "eval_steps_per_second": 0.207, "eval_wer": 0.11894032060773836, "step": 220000 }, { "epoch": 2.16, "grad_norm": 9.210982322692871, "learning_rate": 1.3806551724137932e-06, "loss": 0.1305, "step": 220025 }, { "epoch": 2.16, "grad_norm": 5.772472381591797, "learning_rate": 1.3802241379310346e-06, "loss": 0.222, "step": 220050 }, { "epoch": 2.16, "grad_norm": 5.0596232414245605, "learning_rate": 1.3797931034482759e-06, "loss": 0.1057, "step": 220075 }, { "epoch": 2.16, "grad_norm": 4.7172465324401855, "learning_rate": 1.3793620689655173e-06, "loss": 0.2124, "step": 220100 }, { "epoch": 2.17, "grad_norm": 10.066768646240234, "learning_rate": 1.3789310344827586e-06, "loss": 0.0944, "step": 220125 }, { "epoch": 2.17, "grad_norm": 4.8655195236206055, "learning_rate": 1.3785e-06, "loss": 0.2114, "step": 220150 }, { "epoch": 2.17, "grad_norm": 9.13447380065918, "learning_rate": 1.3780689655172417e-06, "loss": 0.0922, "step": 220175 }, { "epoch": 2.17, "grad_norm": 3.0129430294036865, "learning_rate": 1.377637931034483e-06, "loss": 0.1723, "step": 220200 }, { "epoch": 2.17, "grad_norm": 10.299650192260742, "learning_rate": 1.3772068965517244e-06, "loss": 0.0853, "step": 220225 }, { "epoch": 2.17, "grad_norm": 4.434137344360352, "learning_rate": 1.3767758620689656e-06, "loss": 0.2424, "step": 220250 }, { "epoch": 2.17, "grad_norm": 2.015036106109619, "learning_rate": 1.376344827586207e-06, "loss": 0.084, "step": 220275 }, { "epoch": 2.17, "grad_norm": 4.2357497215271, "learning_rate": 1.3759137931034483e-06, "loss": 0.1957, "step": 220300 }, { "epoch": 2.17, "grad_norm": 7.634780406951904, "learning_rate": 1.3754827586206898e-06, "loss": 0.0895, "step": 220325 }, { "epoch": 2.17, "grad_norm": 4.738967418670654, "learning_rate": 1.375051724137931e-06, "loss": 0.216, "step": 220350 }, { "epoch": 2.17, "grad_norm": 6.517153263092041, "learning_rate": 1.3746206896551725e-06, "loss": 0.1019, "step": 220375 }, { "epoch": 2.17, "grad_norm": 3.4714102745056152, "learning_rate": 1.374189655172414e-06, "loss": 0.2287, "step": 220400 }, { "epoch": 2.17, "grad_norm": 1.9528312683105469, "learning_rate": 1.3737586206896552e-06, "loss": 0.1096, "step": 220425 }, { "epoch": 2.17, "grad_norm": 6.741857528686523, "learning_rate": 1.3733275862068969e-06, "loss": 0.187, "step": 220450 }, { "epoch": 2.17, "grad_norm": 6.683516502380371, "learning_rate": 1.3728965517241379e-06, "loss": 0.1177, "step": 220475 }, { "epoch": 2.17, "grad_norm": 3.8885092735290527, "learning_rate": 1.3724655172413796e-06, "loss": 0.2094, "step": 220500 }, { "epoch": 2.17, "grad_norm": 2.6492183208465576, "learning_rate": 1.3720344827586208e-06, "loss": 0.0925, "step": 220525 }, { "epoch": 2.17, "grad_norm": 4.623905658721924, "learning_rate": 1.3716034482758623e-06, "loss": 0.2112, "step": 220550 }, { "epoch": 2.17, "grad_norm": 6.322756290435791, "learning_rate": 1.3711724137931035e-06, "loss": 0.0935, "step": 220575 }, { "epoch": 2.17, "grad_norm": 5.0827765464782715, "learning_rate": 1.370741379310345e-06, "loss": 0.1605, "step": 220600 }, { "epoch": 2.17, "grad_norm": 10.925559997558594, "learning_rate": 1.3703103448275862e-06, "loss": 0.0937, "step": 220625 }, { "epoch": 2.17, "grad_norm": 3.394132375717163, "learning_rate": 1.3698793103448277e-06, "loss": 0.1991, "step": 220650 }, { "epoch": 2.17, "grad_norm": 12.60792350769043, "learning_rate": 1.3694482758620691e-06, "loss": 0.0748, "step": 220675 }, { "epoch": 2.17, "grad_norm": 4.061805248260498, "learning_rate": 1.3690172413793104e-06, "loss": 0.1966, "step": 220700 }, { "epoch": 2.17, "grad_norm": 8.085787773132324, "learning_rate": 1.3685862068965518e-06, "loss": 0.1297, "step": 220725 }, { "epoch": 2.17, "grad_norm": 4.887253761291504, "learning_rate": 1.368155172413793e-06, "loss": 0.2512, "step": 220750 }, { "epoch": 2.17, "grad_norm": 13.109362602233887, "learning_rate": 1.3677241379310347e-06, "loss": 0.1077, "step": 220775 }, { "epoch": 2.17, "grad_norm": 4.304075717926025, "learning_rate": 1.3672931034482758e-06, "loss": 0.2149, "step": 220800 }, { "epoch": 2.17, "grad_norm": 15.118487358093262, "learning_rate": 1.3668620689655174e-06, "loss": 0.1035, "step": 220825 }, { "epoch": 2.17, "grad_norm": 4.328031063079834, "learning_rate": 1.3664310344827587e-06, "loss": 0.2079, "step": 220850 }, { "epoch": 2.17, "grad_norm": 3.1890923976898193, "learning_rate": 1.3660000000000001e-06, "loss": 0.0873, "step": 220875 }, { "epoch": 2.17, "grad_norm": 3.990663766860962, "learning_rate": 1.3655689655172416e-06, "loss": 0.195, "step": 220900 }, { "epoch": 2.17, "grad_norm": 7.312033653259277, "learning_rate": 1.3651379310344828e-06, "loss": 0.0736, "step": 220925 }, { "epoch": 2.17, "grad_norm": 6.073853015899658, "learning_rate": 1.3647068965517243e-06, "loss": 0.2219, "step": 220950 }, { "epoch": 2.17, "grad_norm": 2.559741735458374, "learning_rate": 1.3642758620689655e-06, "loss": 0.1125, "step": 220975 }, { "epoch": 2.17, "grad_norm": 2.7472176551818848, "learning_rate": 1.363844827586207e-06, "loss": 0.2105, "step": 221000 }, { "epoch": 2.17, "grad_norm": 5.991394996643066, "learning_rate": 1.3634137931034482e-06, "loss": 0.0925, "step": 221025 }, { "epoch": 2.17, "grad_norm": 4.5656938552856445, "learning_rate": 1.3629827586206897e-06, "loss": 0.1879, "step": 221050 }, { "epoch": 2.17, "grad_norm": 8.885279655456543, "learning_rate": 1.362551724137931e-06, "loss": 0.0944, "step": 221075 }, { "epoch": 2.17, "grad_norm": 4.3328142166137695, "learning_rate": 1.3621206896551726e-06, "loss": 0.2331, "step": 221100 }, { "epoch": 2.17, "grad_norm": 8.576812744140625, "learning_rate": 1.361689655172414e-06, "loss": 0.1039, "step": 221125 }, { "epoch": 2.18, "grad_norm": 3.93200421333313, "learning_rate": 1.3612586206896553e-06, "loss": 0.2095, "step": 221150 }, { "epoch": 2.18, "grad_norm": 10.335476875305176, "learning_rate": 1.3608275862068968e-06, "loss": 0.0863, "step": 221175 }, { "epoch": 2.18, "grad_norm": 6.094019412994385, "learning_rate": 1.360396551724138e-06, "loss": 0.2371, "step": 221200 }, { "epoch": 2.18, "grad_norm": 8.61821460723877, "learning_rate": 1.3599655172413795e-06, "loss": 0.0854, "step": 221225 }, { "epoch": 2.18, "grad_norm": 3.7494945526123047, "learning_rate": 1.3595344827586207e-06, "loss": 0.2008, "step": 221250 }, { "epoch": 2.18, "grad_norm": 6.565432548522949, "learning_rate": 1.3591034482758622e-06, "loss": 0.1044, "step": 221275 }, { "epoch": 2.18, "grad_norm": 3.4909613132476807, "learning_rate": 1.3586724137931034e-06, "loss": 0.1818, "step": 221300 }, { "epoch": 2.18, "grad_norm": 8.39028263092041, "learning_rate": 1.3582413793103449e-06, "loss": 0.1073, "step": 221325 }, { "epoch": 2.18, "grad_norm": 4.8505706787109375, "learning_rate": 1.3578103448275865e-06, "loss": 0.1804, "step": 221350 }, { "epoch": 2.18, "grad_norm": 8.393173217773438, "learning_rate": 1.3573793103448276e-06, "loss": 0.088, "step": 221375 }, { "epoch": 2.18, "grad_norm": 4.815698146820068, "learning_rate": 1.3569482758620692e-06, "loss": 0.2147, "step": 221400 }, { "epoch": 2.18, "grad_norm": 10.887417793273926, "learning_rate": 1.3565172413793105e-06, "loss": 0.0951, "step": 221425 }, { "epoch": 2.18, "grad_norm": 6.090976238250732, "learning_rate": 1.356086206896552e-06, "loss": 0.2519, "step": 221450 }, { "epoch": 2.18, "grad_norm": 7.0242085456848145, "learning_rate": 1.3556551724137932e-06, "loss": 0.076, "step": 221475 }, { "epoch": 2.18, "grad_norm": 5.070969104766846, "learning_rate": 1.3552241379310346e-06, "loss": 0.205, "step": 221500 }, { "epoch": 2.18, "grad_norm": 4.323213577270508, "learning_rate": 1.3547931034482759e-06, "loss": 0.0949, "step": 221525 }, { "epoch": 2.18, "grad_norm": 4.814250946044922, "learning_rate": 1.3543620689655173e-06, "loss": 0.1661, "step": 221550 }, { "epoch": 2.18, "grad_norm": 10.955415725708008, "learning_rate": 1.3539310344827588e-06, "loss": 0.0847, "step": 221575 }, { "epoch": 2.18, "grad_norm": 4.626889705657959, "learning_rate": 1.3535e-06, "loss": 0.2116, "step": 221600 }, { "epoch": 2.18, "grad_norm": 9.437483787536621, "learning_rate": 1.3530689655172415e-06, "loss": 0.0942, "step": 221625 }, { "epoch": 2.18, "grad_norm": 5.685725688934326, "learning_rate": 1.3526379310344827e-06, "loss": 0.2477, "step": 221650 }, { "epoch": 2.18, "grad_norm": 6.10081148147583, "learning_rate": 1.3522068965517244e-06, "loss": 0.0894, "step": 221675 }, { "epoch": 2.18, "grad_norm": 4.760306358337402, "learning_rate": 1.3517758620689654e-06, "loss": 0.1931, "step": 221700 }, { "epoch": 2.18, "grad_norm": 10.358682632446289, "learning_rate": 1.3513448275862071e-06, "loss": 0.0923, "step": 221725 }, { "epoch": 2.18, "grad_norm": 3.944923162460327, "learning_rate": 1.3509137931034484e-06, "loss": 0.2496, "step": 221750 }, { "epoch": 2.18, "grad_norm": 9.160606384277344, "learning_rate": 1.3504827586206898e-06, "loss": 0.114, "step": 221775 }, { "epoch": 2.18, "grad_norm": 4.202480792999268, "learning_rate": 1.350051724137931e-06, "loss": 0.2121, "step": 221800 }, { "epoch": 2.18, "grad_norm": 10.052802085876465, "learning_rate": 1.3496206896551725e-06, "loss": 0.079, "step": 221825 }, { "epoch": 2.18, "grad_norm": 7.779782295227051, "learning_rate": 1.349189655172414e-06, "loss": 0.2228, "step": 221850 }, { "epoch": 2.18, "grad_norm": 9.152196884155273, "learning_rate": 1.3487586206896552e-06, "loss": 0.0974, "step": 221875 }, { "epoch": 2.18, "grad_norm": 7.844732761383057, "learning_rate": 1.348344827586207e-06, "loss": 0.2043, "step": 221900 }, { "epoch": 2.18, "grad_norm": 5.088404178619385, "learning_rate": 1.3479137931034483e-06, "loss": 0.093, "step": 221925 }, { "epoch": 2.18, "grad_norm": 6.183593273162842, "learning_rate": 1.3474827586206898e-06, "loss": 0.1834, "step": 221950 }, { "epoch": 2.18, "grad_norm": 8.552050590515137, "learning_rate": 1.347051724137931e-06, "loss": 0.103, "step": 221975 }, { "epoch": 2.18, "grad_norm": 4.87272310256958, "learning_rate": 1.3466206896551725e-06, "loss": 0.2125, "step": 222000 }, { "epoch": 2.18, "grad_norm": 10.201583862304688, "learning_rate": 1.3461896551724137e-06, "loss": 0.0946, "step": 222025 }, { "epoch": 2.18, "grad_norm": 5.5497870445251465, "learning_rate": 1.3457586206896552e-06, "loss": 0.2049, "step": 222050 }, { "epoch": 2.18, "grad_norm": 8.314393043518066, "learning_rate": 1.3453275862068969e-06, "loss": 0.0949, "step": 222075 }, { "epoch": 2.18, "grad_norm": 4.036456108093262, "learning_rate": 1.3448965517241381e-06, "loss": 0.1699, "step": 222100 }, { "epoch": 2.18, "grad_norm": 9.031561851501465, "learning_rate": 1.3444655172413796e-06, "loss": 0.0909, "step": 222125 }, { "epoch": 2.18, "grad_norm": 4.952576160430908, "learning_rate": 1.3440344827586208e-06, "loss": 0.1935, "step": 222150 }, { "epoch": 2.19, "grad_norm": 3.4360404014587402, "learning_rate": 1.3436034482758623e-06, "loss": 0.08, "step": 222175 }, { "epoch": 2.19, "grad_norm": 4.596909523010254, "learning_rate": 1.3431724137931035e-06, "loss": 0.1671, "step": 222200 }, { "epoch": 2.19, "grad_norm": 7.911813735961914, "learning_rate": 1.342741379310345e-06, "loss": 0.0961, "step": 222225 }, { "epoch": 2.19, "grad_norm": 4.7089924812316895, "learning_rate": 1.3423103448275862e-06, "loss": 0.1895, "step": 222250 }, { "epoch": 2.19, "grad_norm": 3.424100637435913, "learning_rate": 1.3418793103448277e-06, "loss": 0.0665, "step": 222275 }, { "epoch": 2.19, "grad_norm": 5.467520236968994, "learning_rate": 1.341448275862069e-06, "loss": 0.2082, "step": 222300 }, { "epoch": 2.19, "grad_norm": 6.241922378540039, "learning_rate": 1.3410172413793104e-06, "loss": 0.0711, "step": 222325 }, { "epoch": 2.19, "grad_norm": 5.312097072601318, "learning_rate": 1.340586206896552e-06, "loss": 0.2291, "step": 222350 }, { "epoch": 2.19, "grad_norm": 10.87580394744873, "learning_rate": 1.340155172413793e-06, "loss": 0.0942, "step": 222375 }, { "epoch": 2.19, "grad_norm": 3.2553741931915283, "learning_rate": 1.3397241379310347e-06, "loss": 0.2087, "step": 222400 }, { "epoch": 2.19, "grad_norm": 9.391987800598145, "learning_rate": 1.339293103448276e-06, "loss": 0.0865, "step": 222425 }, { "epoch": 2.19, "grad_norm": 5.360111713409424, "learning_rate": 1.3388620689655174e-06, "loss": 0.1613, "step": 222450 }, { "epoch": 2.19, "grad_norm": 11.619894981384277, "learning_rate": 1.3384310344827587e-06, "loss": 0.102, "step": 222475 }, { "epoch": 2.19, "grad_norm": 3.325693130493164, "learning_rate": 1.3380000000000001e-06, "loss": 0.1986, "step": 222500 }, { "epoch": 2.19, "grad_norm": 4.388533115386963, "learning_rate": 1.3375689655172414e-06, "loss": 0.0778, "step": 222525 }, { "epoch": 2.19, "grad_norm": 4.84779691696167, "learning_rate": 1.3371379310344829e-06, "loss": 0.2448, "step": 222550 }, { "epoch": 2.19, "grad_norm": 14.246817588806152, "learning_rate": 1.3367068965517243e-06, "loss": 0.1033, "step": 222575 }, { "epoch": 2.19, "grad_norm": 5.288813591003418, "learning_rate": 1.3362758620689656e-06, "loss": 0.1754, "step": 222600 }, { "epoch": 2.19, "grad_norm": 5.758969783782959, "learning_rate": 1.335844827586207e-06, "loss": 0.0903, "step": 222625 }, { "epoch": 2.19, "grad_norm": 3.12351655960083, "learning_rate": 1.3354137931034483e-06, "loss": 0.1923, "step": 222650 }, { "epoch": 2.19, "grad_norm": 11.776300430297852, "learning_rate": 1.33498275862069e-06, "loss": 0.0866, "step": 222675 }, { "epoch": 2.19, "grad_norm": 4.502050876617432, "learning_rate": 1.334551724137931e-06, "loss": 0.2079, "step": 222700 }, { "epoch": 2.19, "grad_norm": 11.35539722442627, "learning_rate": 1.3341206896551726e-06, "loss": 0.1, "step": 222725 }, { "epoch": 2.19, "grad_norm": 3.1959211826324463, "learning_rate": 1.3336896551724139e-06, "loss": 0.173, "step": 222750 }, { "epoch": 2.19, "grad_norm": 5.235823631286621, "learning_rate": 1.3332586206896553e-06, "loss": 0.0946, "step": 222775 }, { "epoch": 2.19, "grad_norm": 4.613935470581055, "learning_rate": 1.3328275862068968e-06, "loss": 0.1819, "step": 222800 }, { "epoch": 2.19, "grad_norm": 6.33218240737915, "learning_rate": 1.332396551724138e-06, "loss": 0.1046, "step": 222825 }, { "epoch": 2.19, "grad_norm": 4.381547451019287, "learning_rate": 1.3319655172413795e-06, "loss": 0.2148, "step": 222850 }, { "epoch": 2.19, "grad_norm": 7.040875434875488, "learning_rate": 1.3315344827586207e-06, "loss": 0.0764, "step": 222875 }, { "epoch": 2.19, "grad_norm": 4.263317584991455, "learning_rate": 1.3311034482758622e-06, "loss": 0.1816, "step": 222900 }, { "epoch": 2.19, "grad_norm": 4.902429580688477, "learning_rate": 1.3306724137931034e-06, "loss": 0.0734, "step": 222925 }, { "epoch": 2.19, "grad_norm": 3.758016347885132, "learning_rate": 1.3302413793103449e-06, "loss": 0.2265, "step": 222950 }, { "epoch": 2.19, "grad_norm": 5.724023342132568, "learning_rate": 1.3298103448275861e-06, "loss": 0.0931, "step": 222975 }, { "epoch": 2.19, "grad_norm": 6.15338659286499, "learning_rate": 1.3293793103448278e-06, "loss": 0.172, "step": 223000 }, { "epoch": 2.19, "grad_norm": 10.140028953552246, "learning_rate": 1.3289482758620693e-06, "loss": 0.0945, "step": 223025 }, { "epoch": 2.19, "grad_norm": 3.7166645526885986, "learning_rate": 1.3285172413793105e-06, "loss": 0.1928, "step": 223050 }, { "epoch": 2.19, "grad_norm": 7.550856113433838, "learning_rate": 1.328086206896552e-06, "loss": 0.1044, "step": 223075 }, { "epoch": 2.19, "grad_norm": 4.530411243438721, "learning_rate": 1.3276551724137932e-06, "loss": 0.2073, "step": 223100 }, { "epoch": 2.19, "grad_norm": 11.150483131408691, "learning_rate": 1.3272241379310347e-06, "loss": 0.1106, "step": 223125 }, { "epoch": 2.19, "grad_norm": 3.495652437210083, "learning_rate": 1.326793103448276e-06, "loss": 0.1935, "step": 223150 }, { "epoch": 2.2, "grad_norm": 7.537541389465332, "learning_rate": 1.3263620689655174e-06, "loss": 0.0744, "step": 223175 }, { "epoch": 2.2, "grad_norm": 3.859163761138916, "learning_rate": 1.3259310344827586e-06, "loss": 0.1941, "step": 223200 }, { "epoch": 2.2, "grad_norm": 7.352869510650635, "learning_rate": 1.3255e-06, "loss": 0.1009, "step": 223225 }, { "epoch": 2.2, "grad_norm": 4.0690131187438965, "learning_rate": 1.3250689655172417e-06, "loss": 0.1718, "step": 223250 }, { "epoch": 2.2, "grad_norm": 0.6928341388702393, "learning_rate": 1.3246379310344828e-06, "loss": 0.0812, "step": 223275 }, { "epoch": 2.2, "grad_norm": 6.472506999969482, "learning_rate": 1.3242068965517244e-06, "loss": 0.1682, "step": 223300 }, { "epoch": 2.2, "grad_norm": 7.795395851135254, "learning_rate": 1.3237758620689657e-06, "loss": 0.1047, "step": 223325 }, { "epoch": 2.2, "grad_norm": 5.571094989776611, "learning_rate": 1.3233448275862071e-06, "loss": 0.2792, "step": 223350 }, { "epoch": 2.2, "grad_norm": 13.405936241149902, "learning_rate": 1.3229137931034484e-06, "loss": 0.1061, "step": 223375 }, { "epoch": 2.2, "grad_norm": 6.673052787780762, "learning_rate": 1.3224827586206898e-06, "loss": 0.2237, "step": 223400 }, { "epoch": 2.2, "grad_norm": 9.956747055053711, "learning_rate": 1.322051724137931e-06, "loss": 0.0843, "step": 223425 }, { "epoch": 2.2, "grad_norm": 3.9377005100250244, "learning_rate": 1.3216206896551725e-06, "loss": 0.2036, "step": 223450 }, { "epoch": 2.2, "grad_norm": 4.655128479003906, "learning_rate": 1.3211896551724138e-06, "loss": 0.0821, "step": 223475 }, { "epoch": 2.2, "grad_norm": 5.415841579437256, "learning_rate": 1.3207586206896552e-06, "loss": 0.2071, "step": 223500 }, { "epoch": 2.2, "grad_norm": 12.20034122467041, "learning_rate": 1.3203275862068967e-06, "loss": 0.0976, "step": 223525 }, { "epoch": 2.2, "grad_norm": 3.7454395294189453, "learning_rate": 1.319896551724138e-06, "loss": 0.2109, "step": 223550 }, { "epoch": 2.2, "grad_norm": 6.346800327301025, "learning_rate": 1.3194655172413796e-06, "loss": 0.1033, "step": 223575 }, { "epoch": 2.2, "grad_norm": 3.464284896850586, "learning_rate": 1.3190344827586206e-06, "loss": 0.1746, "step": 223600 }, { "epoch": 2.2, "grad_norm": 3.5763161182403564, "learning_rate": 1.3186034482758623e-06, "loss": 0.1007, "step": 223625 }, { "epoch": 2.2, "grad_norm": 3.6304433345794678, "learning_rate": 1.3181724137931035e-06, "loss": 0.1864, "step": 223650 }, { "epoch": 2.2, "grad_norm": 7.7953619956970215, "learning_rate": 1.317741379310345e-06, "loss": 0.1038, "step": 223675 }, { "epoch": 2.2, "grad_norm": 3.3305442333221436, "learning_rate": 1.3173103448275862e-06, "loss": 0.2212, "step": 223700 }, { "epoch": 2.2, "grad_norm": 7.087863922119141, "learning_rate": 1.3168793103448277e-06, "loss": 0.0704, "step": 223725 }, { "epoch": 2.2, "grad_norm": 5.444832801818848, "learning_rate": 1.3164482758620692e-06, "loss": 0.2201, "step": 223750 }, { "epoch": 2.2, "grad_norm": 7.6996612548828125, "learning_rate": 1.3160172413793104e-06, "loss": 0.0673, "step": 223775 }, { "epoch": 2.2, "grad_norm": 4.112785339355469, "learning_rate": 1.3155862068965519e-06, "loss": 0.1829, "step": 223800 }, { "epoch": 2.2, "grad_norm": 6.250258445739746, "learning_rate": 1.3151551724137931e-06, "loss": 0.0877, "step": 223825 }, { "epoch": 2.2, "grad_norm": 3.8829445838928223, "learning_rate": 1.3147241379310346e-06, "loss": 0.1707, "step": 223850 }, { "epoch": 2.2, "grad_norm": 9.455552101135254, "learning_rate": 1.3142931034482758e-06, "loss": 0.0972, "step": 223875 }, { "epoch": 2.2, "grad_norm": 5.617674350738525, "learning_rate": 1.3138620689655175e-06, "loss": 0.1992, "step": 223900 }, { "epoch": 2.2, "grad_norm": 5.697497844696045, "learning_rate": 1.3134310344827585e-06, "loss": 0.0849, "step": 223925 }, { "epoch": 2.2, "grad_norm": 3.8515851497650146, "learning_rate": 1.3130000000000002e-06, "loss": 0.2288, "step": 223950 }, { "epoch": 2.2, "grad_norm": 8.890486717224121, "learning_rate": 1.3125689655172416e-06, "loss": 0.099, "step": 223975 }, { "epoch": 2.2, "grad_norm": 4.46003532409668, "learning_rate": 1.3121379310344829e-06, "loss": 0.2354, "step": 224000 }, { "epoch": 2.2, "grad_norm": 10.867656707763672, "learning_rate": 1.3117068965517243e-06, "loss": 0.1298, "step": 224025 }, { "epoch": 2.2, "grad_norm": 4.215760707855225, "learning_rate": 1.3112758620689656e-06, "loss": 0.1948, "step": 224050 }, { "epoch": 2.2, "grad_norm": 4.415144443511963, "learning_rate": 1.310844827586207e-06, "loss": 0.0635, "step": 224075 }, { "epoch": 2.2, "grad_norm": 4.729945659637451, "learning_rate": 1.3104137931034483e-06, "loss": 0.2433, "step": 224100 }, { "epoch": 2.2, "grad_norm": 6.997957706451416, "learning_rate": 1.3099827586206897e-06, "loss": 0.0667, "step": 224125 }, { "epoch": 2.2, "grad_norm": 4.412167072296143, "learning_rate": 1.309551724137931e-06, "loss": 0.2181, "step": 224150 }, { "epoch": 2.2, "grad_norm": 12.50572681427002, "learning_rate": 1.3091206896551724e-06, "loss": 0.0952, "step": 224175 }, { "epoch": 2.21, "grad_norm": 3.6598408222198486, "learning_rate": 1.3086896551724141e-06, "loss": 0.2271, "step": 224200 }, { "epoch": 2.21, "grad_norm": 6.926851749420166, "learning_rate": 1.3082586206896554e-06, "loss": 0.0907, "step": 224225 }, { "epoch": 2.21, "grad_norm": 4.307181358337402, "learning_rate": 1.3078275862068968e-06, "loss": 0.2044, "step": 224250 }, { "epoch": 2.21, "grad_norm": 10.229606628417969, "learning_rate": 1.307396551724138e-06, "loss": 0.0827, "step": 224275 }, { "epoch": 2.21, "grad_norm": 4.4730939865112305, "learning_rate": 1.3069655172413795e-06, "loss": 0.2165, "step": 224300 }, { "epoch": 2.21, "grad_norm": 4.076330184936523, "learning_rate": 1.3065344827586208e-06, "loss": 0.1, "step": 224325 }, { "epoch": 2.21, "grad_norm": 3.584109306335449, "learning_rate": 1.3061034482758622e-06, "loss": 0.2053, "step": 224350 }, { "epoch": 2.21, "grad_norm": 7.062405109405518, "learning_rate": 1.3056724137931035e-06, "loss": 0.0856, "step": 224375 }, { "epoch": 2.21, "grad_norm": 5.269510269165039, "learning_rate": 1.305241379310345e-06, "loss": 0.1842, "step": 224400 }, { "epoch": 2.21, "grad_norm": 7.083425521850586, "learning_rate": 1.3048103448275864e-06, "loss": 0.0792, "step": 224425 }, { "epoch": 2.21, "grad_norm": 3.5340654850006104, "learning_rate": 1.3043793103448276e-06, "loss": 0.1998, "step": 224450 }, { "epoch": 2.21, "grad_norm": 8.542539596557617, "learning_rate": 1.3039482758620693e-06, "loss": 0.0947, "step": 224475 }, { "epoch": 2.21, "grad_norm": 2.7878589630126953, "learning_rate": 1.3035344827586207e-06, "loss": 0.2155, "step": 224500 }, { "epoch": 2.21, "grad_norm": 6.392285346984863, "learning_rate": 1.3031034482758622e-06, "loss": 0.0857, "step": 224525 }, { "epoch": 2.21, "grad_norm": 3.628845691680908, "learning_rate": 1.3026724137931034e-06, "loss": 0.2199, "step": 224550 }, { "epoch": 2.21, "grad_norm": 7.426091194152832, "learning_rate": 1.302241379310345e-06, "loss": 0.0855, "step": 224575 }, { "epoch": 2.21, "grad_norm": 6.1496968269348145, "learning_rate": 1.3018103448275861e-06, "loss": 0.1975, "step": 224600 }, { "epoch": 2.21, "grad_norm": 6.445404052734375, "learning_rate": 1.3013793103448278e-06, "loss": 0.08, "step": 224625 }, { "epoch": 2.21, "grad_norm": 4.664968490600586, "learning_rate": 1.300948275862069e-06, "loss": 0.1815, "step": 224650 }, { "epoch": 2.21, "grad_norm": 6.690098285675049, "learning_rate": 1.3005172413793105e-06, "loss": 0.0936, "step": 224675 }, { "epoch": 2.21, "grad_norm": 3.8383655548095703, "learning_rate": 1.300086206896552e-06, "loss": 0.2261, "step": 224700 }, { "epoch": 2.21, "grad_norm": 8.268793106079102, "learning_rate": 1.2996551724137932e-06, "loss": 0.0875, "step": 224725 }, { "epoch": 2.21, "grad_norm": 4.463335037231445, "learning_rate": 1.2992241379310347e-06, "loss": 0.1683, "step": 224750 }, { "epoch": 2.21, "grad_norm": 4.24522590637207, "learning_rate": 1.298793103448276e-06, "loss": 0.0914, "step": 224775 }, { "epoch": 2.21, "grad_norm": 4.000400066375732, "learning_rate": 1.2983620689655174e-06, "loss": 0.2164, "step": 224800 }, { "epoch": 2.21, "grad_norm": 6.71848726272583, "learning_rate": 1.2979310344827586e-06, "loss": 0.0875, "step": 224825 }, { "epoch": 2.21, "grad_norm": 4.042677879333496, "learning_rate": 1.2975e-06, "loss": 0.181, "step": 224850 }, { "epoch": 2.21, "grad_norm": 6.345245838165283, "learning_rate": 1.2970689655172413e-06, "loss": 0.0746, "step": 224875 }, { "epoch": 2.21, "grad_norm": 4.304490566253662, "learning_rate": 1.2966379310344828e-06, "loss": 0.2239, "step": 224900 }, { "epoch": 2.21, "grad_norm": 6.357385635375977, "learning_rate": 1.2962068965517244e-06, "loss": 0.0909, "step": 224925 }, { "epoch": 2.21, "grad_norm": 7.39578914642334, "learning_rate": 1.2957758620689657e-06, "loss": 0.227, "step": 224950 }, { "epoch": 2.21, "grad_norm": 6.387125015258789, "learning_rate": 1.2953448275862071e-06, "loss": 0.0831, "step": 224975 }, { "epoch": 2.21, "grad_norm": 4.607662200927734, "learning_rate": 1.2949137931034484e-06, "loss": 0.2293, "step": 225000 }, { "epoch": 2.21, "grad_norm": 4.351215839385986, "learning_rate": 1.2945172413793105e-06, "loss": 0.1985, "step": 225025 }, { "epoch": 2.21, "grad_norm": 7.2699055671691895, "learning_rate": 1.2940862068965517e-06, "loss": 0.0923, "step": 225050 }, { "epoch": 2.21, "grad_norm": 5.248844623565674, "learning_rate": 1.2936551724137934e-06, "loss": 0.1971, "step": 225075 }, { "epoch": 2.21, "grad_norm": 12.276921272277832, "learning_rate": 1.2932241379310344e-06, "loss": 0.1278, "step": 225100 }, { "epoch": 2.21, "grad_norm": 3.8806211948394775, "learning_rate": 1.2928103448275863e-06, "loss": 0.1857, "step": 225125 }, { "epoch": 2.21, "grad_norm": 8.554393768310547, "learning_rate": 1.2923793103448276e-06, "loss": 0.093, "step": 225150 }, { "epoch": 2.21, "grad_norm": 3.882810115814209, "learning_rate": 1.291948275862069e-06, "loss": 0.2195, "step": 225175 }, { "epoch": 2.21, "grad_norm": 12.899988174438477, "learning_rate": 1.2915172413793103e-06, "loss": 0.1164, "step": 225200 }, { "epoch": 2.21, "grad_norm": 3.600672960281372, "learning_rate": 1.291086206896552e-06, "loss": 0.1643, "step": 225225 }, { "epoch": 2.21, "grad_norm": 13.896950721740723, "learning_rate": 1.2906551724137934e-06, "loss": 0.1194, "step": 225250 }, { "epoch": 2.21, "grad_norm": 4.262522220611572, "learning_rate": 1.2902241379310346e-06, "loss": 0.1326, "step": 225275 }, { "epoch": 2.22, "grad_norm": 21.945188522338867, "learning_rate": 1.289793103448276e-06, "loss": 0.115, "step": 225300 }, { "epoch": 2.22, "grad_norm": 5.162106513977051, "learning_rate": 1.2893620689655173e-06, "loss": 0.2024, "step": 225325 }, { "epoch": 2.22, "grad_norm": 9.59231185913086, "learning_rate": 1.2889310344827588e-06, "loss": 0.0902, "step": 225350 }, { "epoch": 2.22, "grad_norm": 5.8382792472839355, "learning_rate": 1.2885e-06, "loss": 0.1733, "step": 225375 }, { "epoch": 2.22, "grad_norm": 11.631332397460938, "learning_rate": 1.2880689655172415e-06, "loss": 0.0743, "step": 225400 }, { "epoch": 2.22, "grad_norm": 5.126560688018799, "learning_rate": 1.2876379310344827e-06, "loss": 0.2191, "step": 225425 }, { "epoch": 2.22, "grad_norm": 13.0121488571167, "learning_rate": 1.2872068965517242e-06, "loss": 0.09, "step": 225450 }, { "epoch": 2.22, "grad_norm": 2.857689619064331, "learning_rate": 1.2867758620689654e-06, "loss": 0.1473, "step": 225475 }, { "epoch": 2.22, "grad_norm": 9.167594909667969, "learning_rate": 1.2863448275862069e-06, "loss": 0.1106, "step": 225500 }, { "epoch": 2.22, "grad_norm": 3.918031692504883, "learning_rate": 1.2859137931034486e-06, "loss": 0.1468, "step": 225525 }, { "epoch": 2.22, "grad_norm": 11.113051414489746, "learning_rate": 1.2854827586206898e-06, "loss": 0.116, "step": 225550 }, { "epoch": 2.22, "grad_norm": 5.624621868133545, "learning_rate": 1.2850517241379313e-06, "loss": 0.1694, "step": 225575 }, { "epoch": 2.22, "grad_norm": 12.830183029174805, "learning_rate": 1.2846206896551725e-06, "loss": 0.1034, "step": 225600 }, { "epoch": 2.22, "grad_norm": 7.726714134216309, "learning_rate": 1.284189655172414e-06, "loss": 0.188, "step": 225625 }, { "epoch": 2.22, "grad_norm": 9.966958999633789, "learning_rate": 1.2837586206896552e-06, "loss": 0.0954, "step": 225650 }, { "epoch": 2.22, "grad_norm": 7.91231632232666, "learning_rate": 1.2833275862068967e-06, "loss": 0.1968, "step": 225675 }, { "epoch": 2.22, "grad_norm": 15.496646881103516, "learning_rate": 1.282896551724138e-06, "loss": 0.1125, "step": 225700 }, { "epoch": 2.22, "grad_norm": 6.513902187347412, "learning_rate": 1.2824655172413794e-06, "loss": 0.1787, "step": 225725 }, { "epoch": 2.22, "grad_norm": 6.134385585784912, "learning_rate": 1.2820344827586208e-06, "loss": 0.0955, "step": 225750 }, { "epoch": 2.22, "grad_norm": 5.370728015899658, "learning_rate": 1.281603448275862e-06, "loss": 0.1611, "step": 225775 }, { "epoch": 2.22, "grad_norm": 11.071869850158691, "learning_rate": 1.2811724137931037e-06, "loss": 0.0844, "step": 225800 }, { "epoch": 2.22, "grad_norm": 1.7384947538375854, "learning_rate": 1.2807413793103448e-06, "loss": 0.1866, "step": 225825 }, { "epoch": 2.22, "grad_norm": 6.890034198760986, "learning_rate": 1.2803103448275864e-06, "loss": 0.0846, "step": 225850 }, { "epoch": 2.22, "grad_norm": 3.1012918949127197, "learning_rate": 1.2798793103448277e-06, "loss": 0.1609, "step": 225875 }, { "epoch": 2.22, "grad_norm": 15.788429260253906, "learning_rate": 1.2794482758620691e-06, "loss": 0.102, "step": 225900 }, { "epoch": 2.22, "grad_norm": 8.52459716796875, "learning_rate": 1.2790172413793104e-06, "loss": 0.1845, "step": 225925 }, { "epoch": 2.22, "grad_norm": 6.78429651260376, "learning_rate": 1.2785862068965518e-06, "loss": 0.0886, "step": 225950 }, { "epoch": 2.22, "grad_norm": 7.899984836578369, "learning_rate": 1.2781551724137933e-06, "loss": 0.1806, "step": 225975 }, { "epoch": 2.22, "grad_norm": 10.676167488098145, "learning_rate": 1.2777241379310345e-06, "loss": 0.0903, "step": 226000 }, { "epoch": 2.22, "grad_norm": 2.8234024047851562, "learning_rate": 1.277293103448276e-06, "loss": 0.1538, "step": 226025 }, { "epoch": 2.22, "grad_norm": 12.809505462646484, "learning_rate": 1.2768620689655172e-06, "loss": 0.1231, "step": 226050 }, { "epoch": 2.22, "grad_norm": 2.311309814453125, "learning_rate": 1.2764310344827587e-06, "loss": 0.2022, "step": 226075 }, { "epoch": 2.22, "grad_norm": 8.333486557006836, "learning_rate": 1.276e-06, "loss": 0.0992, "step": 226100 }, { "epoch": 2.22, "grad_norm": 2.3163976669311523, "learning_rate": 1.2755689655172416e-06, "loss": 0.1326, "step": 226125 }, { "epoch": 2.22, "grad_norm": 11.269145965576172, "learning_rate": 1.2751379310344829e-06, "loss": 0.0896, "step": 226150 }, { "epoch": 2.22, "grad_norm": 8.616243362426758, "learning_rate": 1.2747068965517243e-06, "loss": 0.169, "step": 226175 }, { "epoch": 2.22, "grad_norm": 13.371188163757324, "learning_rate": 1.2742758620689658e-06, "loss": 0.1237, "step": 226200 }, { "epoch": 2.22, "grad_norm": 5.033333778381348, "learning_rate": 1.273844827586207e-06, "loss": 0.1829, "step": 226225 }, { "epoch": 2.22, "grad_norm": 9.312294006347656, "learning_rate": 1.2734137931034485e-06, "loss": 0.0875, "step": 226250 }, { "epoch": 2.22, "grad_norm": 5.232822418212891, "learning_rate": 1.2729827586206897e-06, "loss": 0.1945, "step": 226275 }, { "epoch": 2.23, "grad_norm": 8.798897743225098, "learning_rate": 1.2725517241379312e-06, "loss": 0.1195, "step": 226300 }, { "epoch": 2.23, "grad_norm": 6.692351341247559, "learning_rate": 1.2721206896551724e-06, "loss": 0.1722, "step": 226325 }, { "epoch": 2.23, "grad_norm": 13.440330505371094, "learning_rate": 1.2716896551724139e-06, "loss": 0.1005, "step": 226350 }, { "epoch": 2.23, "grad_norm": 6.602782249450684, "learning_rate": 1.2712586206896551e-06, "loss": 0.1652, "step": 226375 }, { "epoch": 2.23, "grad_norm": 11.382805824279785, "learning_rate": 1.2708275862068966e-06, "loss": 0.0959, "step": 226400 }, { "epoch": 2.23, "grad_norm": 3.2634034156799316, "learning_rate": 1.2703965517241382e-06, "loss": 0.1668, "step": 226425 }, { "epoch": 2.23, "grad_norm": 8.81252384185791, "learning_rate": 1.2699655172413795e-06, "loss": 0.1039, "step": 226450 }, { "epoch": 2.23, "grad_norm": 3.3149378299713135, "learning_rate": 1.269534482758621e-06, "loss": 0.1922, "step": 226475 }, { "epoch": 2.23, "grad_norm": 10.780671119689941, "learning_rate": 1.2691034482758622e-06, "loss": 0.1229, "step": 226500 }, { "epoch": 2.23, "grad_norm": 3.331968307495117, "learning_rate": 1.2686724137931036e-06, "loss": 0.1667, "step": 226525 }, { "epoch": 2.23, "grad_norm": 9.374412536621094, "learning_rate": 1.2682413793103449e-06, "loss": 0.109, "step": 226550 }, { "epoch": 2.23, "grad_norm": 3.6027426719665527, "learning_rate": 1.2678103448275863e-06, "loss": 0.1641, "step": 226575 }, { "epoch": 2.23, "grad_norm": 13.448038101196289, "learning_rate": 1.2673793103448276e-06, "loss": 0.1093, "step": 226600 }, { "epoch": 2.23, "grad_norm": 1.8297815322875977, "learning_rate": 1.266948275862069e-06, "loss": 0.1736, "step": 226625 }, { "epoch": 2.23, "grad_norm": 12.138387680053711, "learning_rate": 1.2665172413793103e-06, "loss": 0.1005, "step": 226650 }, { "epoch": 2.23, "grad_norm": 6.059604167938232, "learning_rate": 1.2660862068965517e-06, "loss": 0.2028, "step": 226675 }, { "epoch": 2.23, "grad_norm": 12.615572929382324, "learning_rate": 1.2656551724137934e-06, "loss": 0.126, "step": 226700 }, { "epoch": 2.23, "grad_norm": 8.686163902282715, "learning_rate": 1.2652241379310344e-06, "loss": 0.1902, "step": 226725 }, { "epoch": 2.23, "grad_norm": 11.833559036254883, "learning_rate": 1.2647931034482761e-06, "loss": 0.0892, "step": 226750 }, { "epoch": 2.23, "grad_norm": 6.377890586853027, "learning_rate": 1.2643620689655174e-06, "loss": 0.1568, "step": 226775 }, { "epoch": 2.23, "grad_norm": 13.028762817382812, "learning_rate": 1.2639310344827588e-06, "loss": 0.1083, "step": 226800 }, { "epoch": 2.23, "grad_norm": 0.4878884553909302, "learning_rate": 1.2635e-06, "loss": 0.1827, "step": 226825 }, { "epoch": 2.23, "grad_norm": 15.142601013183594, "learning_rate": 1.2630689655172415e-06, "loss": 0.1003, "step": 226850 }, { "epoch": 2.23, "grad_norm": 2.9815785884857178, "learning_rate": 1.2626379310344828e-06, "loss": 0.1772, "step": 226875 }, { "epoch": 2.23, "grad_norm": 9.126315116882324, "learning_rate": 1.2622068965517242e-06, "loss": 0.1136, "step": 226900 }, { "epoch": 2.23, "grad_norm": 2.017030954360962, "learning_rate": 1.2617758620689657e-06, "loss": 0.1865, "step": 226925 }, { "epoch": 2.23, "grad_norm": 5.186183452606201, "learning_rate": 1.261344827586207e-06, "loss": 0.0956, "step": 226950 }, { "epoch": 2.23, "grad_norm": 4.372758388519287, "learning_rate": 1.2609137931034484e-06, "loss": 0.1584, "step": 226975 }, { "epoch": 2.23, "grad_norm": 11.074505805969238, "learning_rate": 1.2604827586206896e-06, "loss": 0.08, "step": 227000 }, { "epoch": 2.23, "grad_norm": 1.0719410181045532, "learning_rate": 1.2600517241379313e-06, "loss": 0.1775, "step": 227025 }, { "epoch": 2.23, "grad_norm": 9.743861198425293, "learning_rate": 1.2596206896551723e-06, "loss": 0.086, "step": 227050 }, { "epoch": 2.23, "grad_norm": 5.120839595794678, "learning_rate": 1.259189655172414e-06, "loss": 0.1509, "step": 227075 }, { "epoch": 2.23, "grad_norm": 15.385075569152832, "learning_rate": 1.2587586206896552e-06, "loss": 0.0795, "step": 227100 }, { "epoch": 2.23, "grad_norm": 2.930298328399658, "learning_rate": 1.2583448275862071e-06, "loss": 0.1684, "step": 227125 }, { "epoch": 2.23, "grad_norm": 10.342663764953613, "learning_rate": 1.2579137931034481e-06, "loss": 0.096, "step": 227150 }, { "epoch": 2.23, "grad_norm": 5.637037754058838, "learning_rate": 1.2574827586206898e-06, "loss": 0.1984, "step": 227175 }, { "epoch": 2.23, "grad_norm": 7.999183177947998, "learning_rate": 1.2570517241379313e-06, "loss": 0.1052, "step": 227200 }, { "epoch": 2.23, "grad_norm": 7.359177589416504, "learning_rate": 1.2566206896551725e-06, "loss": 0.1983, "step": 227225 }, { "epoch": 2.23, "grad_norm": 7.867815017700195, "learning_rate": 1.256189655172414e-06, "loss": 0.0775, "step": 227250 }, { "epoch": 2.23, "grad_norm": 4.685422420501709, "learning_rate": 1.2557586206896552e-06, "loss": 0.1749, "step": 227275 }, { "epoch": 2.23, "grad_norm": 13.698882102966309, "learning_rate": 1.2553275862068967e-06, "loss": 0.0963, "step": 227300 }, { "epoch": 2.24, "grad_norm": 4.221513748168945, "learning_rate": 1.254896551724138e-06, "loss": 0.1859, "step": 227325 }, { "epoch": 2.24, "grad_norm": 7.654146194458008, "learning_rate": 1.2544655172413794e-06, "loss": 0.1028, "step": 227350 }, { "epoch": 2.24, "grad_norm": 4.347540378570557, "learning_rate": 1.2540344827586206e-06, "loss": 0.1542, "step": 227375 }, { "epoch": 2.24, "grad_norm": 8.656203269958496, "learning_rate": 1.253603448275862e-06, "loss": 0.1031, "step": 227400 }, { "epoch": 2.24, "grad_norm": 5.271262168884277, "learning_rate": 1.2531724137931037e-06, "loss": 0.1759, "step": 227425 }, { "epoch": 2.24, "grad_norm": 12.595511436462402, "learning_rate": 1.252741379310345e-06, "loss": 0.1058, "step": 227450 }, { "epoch": 2.24, "grad_norm": 6.155264854431152, "learning_rate": 1.2523103448275864e-06, "loss": 0.1852, "step": 227475 }, { "epoch": 2.24, "grad_norm": 16.312992095947266, "learning_rate": 1.2518793103448277e-06, "loss": 0.1055, "step": 227500 }, { "epoch": 2.24, "grad_norm": 4.304591655731201, "learning_rate": 1.2514482758620691e-06, "loss": 0.1394, "step": 227525 }, { "epoch": 2.24, "grad_norm": 12.871062278747559, "learning_rate": 1.2510172413793104e-06, "loss": 0.0954, "step": 227550 }, { "epoch": 2.24, "grad_norm": 2.182036876678467, "learning_rate": 1.2505862068965518e-06, "loss": 0.1455, "step": 227575 }, { "epoch": 2.24, "grad_norm": 19.226377487182617, "learning_rate": 1.250155172413793e-06, "loss": 0.1141, "step": 227600 }, { "epoch": 2.24, "grad_norm": 7.506092548370361, "learning_rate": 1.2497241379310345e-06, "loss": 0.1874, "step": 227625 }, { "epoch": 2.24, "grad_norm": 6.98958158493042, "learning_rate": 1.249293103448276e-06, "loss": 0.0898, "step": 227650 }, { "epoch": 2.24, "grad_norm": 1.8385511636734009, "learning_rate": 1.2488620689655173e-06, "loss": 0.1649, "step": 227675 }, { "epoch": 2.24, "grad_norm": 16.530248641967773, "learning_rate": 1.2484310344827587e-06, "loss": 0.1105, "step": 227700 }, { "epoch": 2.24, "grad_norm": 3.858656644821167, "learning_rate": 1.248e-06, "loss": 0.1886, "step": 227725 }, { "epoch": 2.24, "grad_norm": 10.811960220336914, "learning_rate": 1.2475689655172416e-06, "loss": 0.0905, "step": 227750 }, { "epoch": 2.24, "grad_norm": 4.799722671508789, "learning_rate": 1.2471379310344829e-06, "loss": 0.1773, "step": 227775 }, { "epoch": 2.24, "grad_norm": 9.425414085388184, "learning_rate": 1.2467068965517243e-06, "loss": 0.1201, "step": 227800 }, { "epoch": 2.24, "grad_norm": 6.412459850311279, "learning_rate": 1.2462758620689656e-06, "loss": 0.206, "step": 227825 }, { "epoch": 2.24, "grad_norm": 7.935391426086426, "learning_rate": 1.245844827586207e-06, "loss": 0.1034, "step": 227850 }, { "epoch": 2.24, "grad_norm": 10.780295372009277, "learning_rate": 1.2454137931034483e-06, "loss": 0.1326, "step": 227875 }, { "epoch": 2.24, "grad_norm": 8.770188331604004, "learning_rate": 1.2449827586206897e-06, "loss": 0.1011, "step": 227900 }, { "epoch": 2.24, "grad_norm": 1.9164282083511353, "learning_rate": 1.2445517241379312e-06, "loss": 0.19, "step": 227925 }, { "epoch": 2.24, "grad_norm": 8.760740280151367, "learning_rate": 1.2441206896551724e-06, "loss": 0.0846, "step": 227950 }, { "epoch": 2.24, "grad_norm": 2.573153495788574, "learning_rate": 1.2436896551724139e-06, "loss": 0.1505, "step": 227975 }, { "epoch": 2.24, "grad_norm": 15.13142204284668, "learning_rate": 1.2432586206896553e-06, "loss": 0.1013, "step": 228000 }, { "epoch": 2.24, "grad_norm": 4.127839088439941, "learning_rate": 1.2428275862068968e-06, "loss": 0.1775, "step": 228025 }, { "epoch": 2.24, "grad_norm": 14.354918479919434, "learning_rate": 1.242396551724138e-06, "loss": 0.1018, "step": 228050 }, { "epoch": 2.24, "grad_norm": 3.2141973972320557, "learning_rate": 1.2419655172413795e-06, "loss": 0.1852, "step": 228075 }, { "epoch": 2.24, "grad_norm": 33.397640228271484, "learning_rate": 1.2415344827586207e-06, "loss": 0.1113, "step": 228100 }, { "epoch": 2.24, "grad_norm": 4.663841247558594, "learning_rate": 1.2411034482758622e-06, "loss": 0.2012, "step": 228125 }, { "epoch": 2.24, "grad_norm": 10.754850387573242, "learning_rate": 1.2406724137931034e-06, "loss": 0.0873, "step": 228150 }, { "epoch": 2.24, "grad_norm": 4.280104160308838, "learning_rate": 1.240241379310345e-06, "loss": 0.1769, "step": 228175 }, { "epoch": 2.24, "grad_norm": 11.01152229309082, "learning_rate": 1.2398103448275864e-06, "loss": 0.0978, "step": 228200 }, { "epoch": 2.24, "grad_norm": 1.2139599323272705, "learning_rate": 1.2393793103448278e-06, "loss": 0.2153, "step": 228225 }, { "epoch": 2.24, "grad_norm": 9.52919864654541, "learning_rate": 1.238948275862069e-06, "loss": 0.0909, "step": 228250 }, { "epoch": 2.24, "grad_norm": 4.495757102966309, "learning_rate": 1.2385172413793105e-06, "loss": 0.1458, "step": 228275 }, { "epoch": 2.24, "grad_norm": 12.739578247070312, "learning_rate": 1.2380862068965518e-06, "loss": 0.0972, "step": 228300 }, { "epoch": 2.24, "grad_norm": 7.341182231903076, "learning_rate": 1.2376551724137932e-06, "loss": 0.1589, "step": 228325 }, { "epoch": 2.25, "grad_norm": 6.958410739898682, "learning_rate": 1.2372241379310347e-06, "loss": 0.0913, "step": 228350 }, { "epoch": 2.25, "grad_norm": 1.9237422943115234, "learning_rate": 1.236793103448276e-06, "loss": 0.1651, "step": 228375 }, { "epoch": 2.25, "grad_norm": 10.701895713806152, "learning_rate": 1.2363620689655174e-06, "loss": 0.0576, "step": 228400 }, { "epoch": 2.25, "grad_norm": 4.19976282119751, "learning_rate": 1.2359310344827586e-06, "loss": 0.1785, "step": 228425 }, { "epoch": 2.25, "grad_norm": 13.30831241607666, "learning_rate": 1.2355e-06, "loss": 0.1052, "step": 228450 }, { "epoch": 2.25, "grad_norm": 6.630136013031006, "learning_rate": 1.2350689655172415e-06, "loss": 0.1898, "step": 228475 }, { "epoch": 2.25, "grad_norm": 13.417545318603516, "learning_rate": 1.234637931034483e-06, "loss": 0.0887, "step": 228500 }, { "epoch": 2.25, "grad_norm": 2.6098434925079346, "learning_rate": 1.2342068965517242e-06, "loss": 0.2317, "step": 228525 }, { "epoch": 2.25, "grad_norm": 6.7943644523620605, "learning_rate": 1.2337758620689657e-06, "loss": 0.0813, "step": 228550 }, { "epoch": 2.25, "grad_norm": 3.8297183513641357, "learning_rate": 1.233344827586207e-06, "loss": 0.1506, "step": 228575 }, { "epoch": 2.25, "grad_norm": 11.525127410888672, "learning_rate": 1.2329137931034484e-06, "loss": 0.1069, "step": 228600 }, { "epoch": 2.25, "grad_norm": 6.222299098968506, "learning_rate": 1.2324827586206896e-06, "loss": 0.1987, "step": 228625 }, { "epoch": 2.25, "grad_norm": 6.61613655090332, "learning_rate": 1.232051724137931e-06, "loss": 0.0955, "step": 228650 }, { "epoch": 2.25, "grad_norm": 4.349690914154053, "learning_rate": 1.2316206896551725e-06, "loss": 0.1963, "step": 228675 }, { "epoch": 2.25, "grad_norm": 12.503878593444824, "learning_rate": 1.231189655172414e-06, "loss": 0.1117, "step": 228700 }, { "epoch": 2.25, "grad_norm": 5.564282417297363, "learning_rate": 1.2307586206896552e-06, "loss": 0.1777, "step": 228725 }, { "epoch": 2.25, "grad_norm": 12.682760238647461, "learning_rate": 1.2303275862068967e-06, "loss": 0.0967, "step": 228750 }, { "epoch": 2.25, "grad_norm": 3.8142735958099365, "learning_rate": 1.229896551724138e-06, "loss": 0.145, "step": 228775 }, { "epoch": 2.25, "grad_norm": 11.771915435791016, "learning_rate": 1.2294655172413794e-06, "loss": 0.0745, "step": 228800 }, { "epoch": 2.25, "grad_norm": 0.5140408873558044, "learning_rate": 1.2290344827586209e-06, "loss": 0.1749, "step": 228825 }, { "epoch": 2.25, "grad_norm": 12.79300308227539, "learning_rate": 1.228603448275862e-06, "loss": 0.1052, "step": 228850 }, { "epoch": 2.25, "grad_norm": 4.7570600509643555, "learning_rate": 1.2281724137931036e-06, "loss": 0.1824, "step": 228875 }, { "epoch": 2.25, "grad_norm": 10.512479782104492, "learning_rate": 1.2277413793103448e-06, "loss": 0.0735, "step": 228900 }, { "epoch": 2.25, "grad_norm": 4.1431145668029785, "learning_rate": 1.2273103448275865e-06, "loss": 0.1648, "step": 228925 }, { "epoch": 2.25, "grad_norm": 7.2672953605651855, "learning_rate": 1.2268793103448277e-06, "loss": 0.08, "step": 228950 }, { "epoch": 2.25, "grad_norm": 1.081886887550354, "learning_rate": 1.2264482758620692e-06, "loss": 0.2, "step": 228975 }, { "epoch": 2.25, "grad_norm": 8.02213191986084, "learning_rate": 1.2260172413793104e-06, "loss": 0.1101, "step": 229000 }, { "epoch": 2.25, "grad_norm": 6.186495780944824, "learning_rate": 1.2255862068965519e-06, "loss": 0.1837, "step": 229025 }, { "epoch": 2.25, "grad_norm": 14.316014289855957, "learning_rate": 1.2251551724137931e-06, "loss": 0.0998, "step": 229050 }, { "epoch": 2.25, "grad_norm": 5.852708339691162, "learning_rate": 1.2247241379310346e-06, "loss": 0.2114, "step": 229075 }, { "epoch": 2.25, "grad_norm": 7.582895755767822, "learning_rate": 1.2242931034482758e-06, "loss": 0.1398, "step": 229100 }, { "epoch": 2.25, "grad_norm": 5.793493747711182, "learning_rate": 1.2238620689655173e-06, "loss": 0.1889, "step": 229125 }, { "epoch": 2.25, "grad_norm": 4.483314514160156, "learning_rate": 1.2234310344827587e-06, "loss": 0.1152, "step": 229150 }, { "epoch": 2.25, "grad_norm": 1.5771342515945435, "learning_rate": 1.2230000000000002e-06, "loss": 0.1863, "step": 229175 }, { "epoch": 2.25, "grad_norm": 15.49301815032959, "learning_rate": 1.2225689655172414e-06, "loss": 0.0778, "step": 229200 }, { "epoch": 2.25, "grad_norm": 7.216879367828369, "learning_rate": 1.2221379310344829e-06, "loss": 0.1554, "step": 229225 }, { "epoch": 2.25, "grad_norm": 10.612343788146973, "learning_rate": 1.2217068965517243e-06, "loss": 0.1194, "step": 229250 }, { "epoch": 2.25, "grad_norm": 6.312224864959717, "learning_rate": 1.2212758620689656e-06, "loss": 0.1776, "step": 229275 }, { "epoch": 2.25, "grad_norm": 7.668562889099121, "learning_rate": 1.220844827586207e-06, "loss": 0.1092, "step": 229300 }, { "epoch": 2.25, "grad_norm": 4.674604892730713, "learning_rate": 1.2204137931034483e-06, "loss": 0.1939, "step": 229325 }, { "epoch": 2.26, "grad_norm": 8.481077194213867, "learning_rate": 1.2199827586206898e-06, "loss": 0.1254, "step": 229350 }, { "epoch": 2.26, "grad_norm": 4.685754776000977, "learning_rate": 1.219551724137931e-06, "loss": 0.1641, "step": 229375 }, { "epoch": 2.26, "grad_norm": 9.009527206420898, "learning_rate": 1.2191206896551727e-06, "loss": 0.0912, "step": 229400 }, { "epoch": 2.26, "grad_norm": 3.728020429611206, "learning_rate": 1.218689655172414e-06, "loss": 0.1989, "step": 229425 }, { "epoch": 2.26, "grad_norm": 14.019264221191406, "learning_rate": 1.2182586206896554e-06, "loss": 0.0976, "step": 229450 }, { "epoch": 2.26, "grad_norm": 4.427631855010986, "learning_rate": 1.2178275862068966e-06, "loss": 0.1841, "step": 229475 }, { "epoch": 2.26, "grad_norm": 11.127077102661133, "learning_rate": 1.217396551724138e-06, "loss": 0.0669, "step": 229500 }, { "epoch": 2.26, "grad_norm": 11.744400978088379, "learning_rate": 1.2169655172413793e-06, "loss": 0.188, "step": 229525 }, { "epoch": 2.26, "grad_norm": 8.510645866394043, "learning_rate": 1.2165344827586208e-06, "loss": 0.0881, "step": 229550 }, { "epoch": 2.26, "grad_norm": 3.9388651847839355, "learning_rate": 1.2161206896551724e-06, "loss": 0.1637, "step": 229575 }, { "epoch": 2.26, "grad_norm": 7.659750461578369, "learning_rate": 1.215689655172414e-06, "loss": 0.0881, "step": 229600 }, { "epoch": 2.26, "grad_norm": 4.275875091552734, "learning_rate": 1.2152586206896551e-06, "loss": 0.1849, "step": 229625 }, { "epoch": 2.26, "grad_norm": 13.273831367492676, "learning_rate": 1.2148275862068966e-06, "loss": 0.0936, "step": 229650 }, { "epoch": 2.26, "grad_norm": 3.3013086318969727, "learning_rate": 1.214396551724138e-06, "loss": 0.1823, "step": 229675 }, { "epoch": 2.26, "grad_norm": 8.738727569580078, "learning_rate": 1.2139655172413795e-06, "loss": 0.0973, "step": 229700 }, { "epoch": 2.26, "grad_norm": 4.988706111907959, "learning_rate": 1.2135344827586208e-06, "loss": 0.1636, "step": 229725 }, { "epoch": 2.26, "grad_norm": 5.8334574699401855, "learning_rate": 1.2131034482758622e-06, "loss": 0.1238, "step": 229750 }, { "epoch": 2.26, "grad_norm": 0.9181334376335144, "learning_rate": 1.2126724137931035e-06, "loss": 0.1983, "step": 229775 }, { "epoch": 2.26, "grad_norm": 13.104950904846191, "learning_rate": 1.212241379310345e-06, "loss": 0.105, "step": 229800 }, { "epoch": 2.26, "grad_norm": 0.3434581756591797, "learning_rate": 1.2118103448275864e-06, "loss": 0.152, "step": 229825 }, { "epoch": 2.26, "grad_norm": 6.358633995056152, "learning_rate": 1.2113793103448276e-06, "loss": 0.0861, "step": 229850 }, { "epoch": 2.26, "grad_norm": 3.4959371089935303, "learning_rate": 1.210948275862069e-06, "loss": 0.1765, "step": 229875 }, { "epoch": 2.26, "grad_norm": 2.4917593002319336, "learning_rate": 1.2105172413793105e-06, "loss": 0.0962, "step": 229900 }, { "epoch": 2.26, "grad_norm": 1.8898212909698486, "learning_rate": 1.2100862068965518e-06, "loss": 0.1848, "step": 229925 }, { "epoch": 2.26, "grad_norm": 8.575486183166504, "learning_rate": 1.2096551724137932e-06, "loss": 0.0867, "step": 229950 }, { "epoch": 2.26, "grad_norm": 4.635428428649902, "learning_rate": 1.2092241379310347e-06, "loss": 0.176, "step": 229975 }, { "epoch": 2.26, "grad_norm": 7.955153465270996, "learning_rate": 1.208793103448276e-06, "loss": 0.1144, "step": 230000 }, { "epoch": 2.26, "grad_norm": 0.23365049064159393, "learning_rate": 1.2083620689655174e-06, "loss": 0.2177, "step": 230025 }, { "epoch": 2.26, "grad_norm": 15.95496940612793, "learning_rate": 1.2079310344827586e-06, "loss": 0.1032, "step": 230050 }, { "epoch": 2.26, "grad_norm": 6.416160583496094, "learning_rate": 1.2075e-06, "loss": 0.1728, "step": 230075 }, { "epoch": 2.26, "grad_norm": 9.95510482788086, "learning_rate": 1.2070689655172413e-06, "loss": 0.1159, "step": 230100 }, { "epoch": 2.26, "grad_norm": 6.144651889801025, "learning_rate": 1.2066379310344828e-06, "loss": 0.2303, "step": 230125 }, { "epoch": 2.26, "grad_norm": 5.673972129821777, "learning_rate": 1.2062068965517242e-06, "loss": 0.0951, "step": 230150 }, { "epoch": 2.26, "grad_norm": 4.952970504760742, "learning_rate": 1.2057758620689657e-06, "loss": 0.2043, "step": 230175 }, { "epoch": 2.26, "grad_norm": 8.867018699645996, "learning_rate": 1.205344827586207e-06, "loss": 0.0963, "step": 230200 }, { "epoch": 2.26, "grad_norm": 0.49752143025398254, "learning_rate": 1.2049137931034484e-06, "loss": 0.1777, "step": 230225 }, { "epoch": 2.26, "grad_norm": 13.101277351379395, "learning_rate": 1.2044827586206899e-06, "loss": 0.0937, "step": 230250 }, { "epoch": 2.26, "grad_norm": 8.70748233795166, "learning_rate": 1.204051724137931e-06, "loss": 0.1893, "step": 230275 }, { "epoch": 2.26, "grad_norm": 10.382033348083496, "learning_rate": 1.2036206896551726e-06, "loss": 0.0735, "step": 230300 }, { "epoch": 2.26, "grad_norm": 4.384866237640381, "learning_rate": 1.2031896551724138e-06, "loss": 0.1943, "step": 230325 }, { "epoch": 2.26, "grad_norm": 12.967944145202637, "learning_rate": 1.2027586206896553e-06, "loss": 0.1016, "step": 230350 }, { "epoch": 2.27, "grad_norm": 4.426616668701172, "learning_rate": 1.2023275862068967e-06, "loss": 0.1979, "step": 230375 }, { "epoch": 2.27, "grad_norm": 11.413748741149902, "learning_rate": 1.2018965517241382e-06, "loss": 0.096, "step": 230400 }, { "epoch": 2.27, "grad_norm": 4.675804615020752, "learning_rate": 1.2014655172413794e-06, "loss": 0.2248, "step": 230425 }, { "epoch": 2.27, "grad_norm": 9.787238121032715, "learning_rate": 1.2010344827586209e-06, "loss": 0.104, "step": 230450 }, { "epoch": 2.27, "grad_norm": 2.6917598247528076, "learning_rate": 1.2006034482758621e-06, "loss": 0.1661, "step": 230475 }, { "epoch": 2.27, "grad_norm": 8.563612937927246, "learning_rate": 1.2001724137931036e-06, "loss": 0.114, "step": 230500 }, { "epoch": 2.27, "grad_norm": 7.004003047943115, "learning_rate": 1.1997413793103448e-06, "loss": 0.1362, "step": 230525 }, { "epoch": 2.27, "grad_norm": 12.874324798583984, "learning_rate": 1.1993103448275863e-06, "loss": 0.1021, "step": 230550 }, { "epoch": 2.27, "grad_norm": 2.7889468669891357, "learning_rate": 1.1988793103448277e-06, "loss": 0.1721, "step": 230575 }, { "epoch": 2.27, "grad_norm": 7.393643379211426, "learning_rate": 1.1984482758620692e-06, "loss": 0.1002, "step": 230600 }, { "epoch": 2.27, "grad_norm": 3.676154851913452, "learning_rate": 1.1980172413793104e-06, "loss": 0.1687, "step": 230625 }, { "epoch": 2.27, "grad_norm": 8.813239097595215, "learning_rate": 1.1975862068965519e-06, "loss": 0.1141, "step": 230650 }, { "epoch": 2.27, "grad_norm": 2.4393656253814697, "learning_rate": 1.1971551724137931e-06, "loss": 0.1754, "step": 230675 }, { "epoch": 2.27, "grad_norm": 9.93536376953125, "learning_rate": 1.1967241379310346e-06, "loss": 0.0901, "step": 230700 }, { "epoch": 2.27, "grad_norm": 5.457149505615234, "learning_rate": 1.196293103448276e-06, "loss": 0.1647, "step": 230725 }, { "epoch": 2.27, "grad_norm": 8.081620216369629, "learning_rate": 1.1958620689655173e-06, "loss": 0.0827, "step": 230750 }, { "epoch": 2.27, "grad_norm": 2.791853427886963, "learning_rate": 1.1954310344827587e-06, "loss": 0.1675, "step": 230775 }, { "epoch": 2.27, "grad_norm": 7.095437049865723, "learning_rate": 1.195e-06, "loss": 0.0897, "step": 230800 }, { "epoch": 2.27, "grad_norm": 8.94880199432373, "learning_rate": 1.1945689655172415e-06, "loss": 0.2162, "step": 230825 }, { "epoch": 2.27, "grad_norm": 4.314781188964844, "learning_rate": 1.194137931034483e-06, "loss": 0.0856, "step": 230850 }, { "epoch": 2.27, "grad_norm": 0.17459416389465332, "learning_rate": 1.1937068965517244e-06, "loss": 0.2051, "step": 230875 }, { "epoch": 2.27, "grad_norm": 12.953760147094727, "learning_rate": 1.1932758620689656e-06, "loss": 0.1332, "step": 230900 }, { "epoch": 2.27, "grad_norm": 5.647008419036865, "learning_rate": 1.192844827586207e-06, "loss": 0.158, "step": 230925 }, { "epoch": 2.27, "grad_norm": 10.08419132232666, "learning_rate": 1.1924137931034483e-06, "loss": 0.1282, "step": 230950 }, { "epoch": 2.27, "grad_norm": 0.047988519072532654, "learning_rate": 1.1919827586206898e-06, "loss": 0.1862, "step": 230975 }, { "epoch": 2.27, "grad_norm": 2.3061602115631104, "learning_rate": 1.191551724137931e-06, "loss": 0.0759, "step": 231000 }, { "epoch": 2.27, "grad_norm": 2.3006668090820312, "learning_rate": 1.1911206896551725e-06, "loss": 0.1933, "step": 231025 }, { "epoch": 2.27, "grad_norm": 9.672102928161621, "learning_rate": 1.190689655172414e-06, "loss": 0.0831, "step": 231050 }, { "epoch": 2.27, "grad_norm": 5.706908226013184, "learning_rate": 1.1902586206896554e-06, "loss": 0.1845, "step": 231075 }, { "epoch": 2.27, "grad_norm": 10.500297546386719, "learning_rate": 1.1898275862068966e-06, "loss": 0.0779, "step": 231100 }, { "epoch": 2.27, "grad_norm": 0.7137186527252197, "learning_rate": 1.189396551724138e-06, "loss": 0.2071, "step": 231125 }, { "epoch": 2.27, "grad_norm": 10.774118423461914, "learning_rate": 1.1889655172413793e-06, "loss": 0.0833, "step": 231150 }, { "epoch": 2.27, "grad_norm": 0.3502133786678314, "learning_rate": 1.1885344827586208e-06, "loss": 0.1825, "step": 231175 }, { "epoch": 2.27, "grad_norm": 7.205417156219482, "learning_rate": 1.1881034482758622e-06, "loss": 0.0834, "step": 231200 }, { "epoch": 2.27, "grad_norm": 6.723935604095459, "learning_rate": 1.1876724137931035e-06, "loss": 0.2245, "step": 231225 }, { "epoch": 2.27, "grad_norm": 6.1424760818481445, "learning_rate": 1.187241379310345e-06, "loss": 0.1175, "step": 231250 }, { "epoch": 2.27, "grad_norm": 5.593952655792236, "learning_rate": 1.1868103448275862e-06, "loss": 0.1743, "step": 231275 }, { "epoch": 2.27, "grad_norm": 14.19692325592041, "learning_rate": 1.1863793103448276e-06, "loss": 0.116, "step": 231300 }, { "epoch": 2.27, "grad_norm": 3.104688882827759, "learning_rate": 1.185948275862069e-06, "loss": 0.2033, "step": 231325 }, { "epoch": 2.27, "grad_norm": 6.465788841247559, "learning_rate": 1.1855172413793106e-06, "loss": 0.0969, "step": 231350 }, { "epoch": 2.27, "grad_norm": 3.2872002124786377, "learning_rate": 1.1850862068965518e-06, "loss": 0.1576, "step": 231375 }, { "epoch": 2.28, "grad_norm": 9.394559860229492, "learning_rate": 1.1846551724137933e-06, "loss": 0.1177, "step": 231400 }, { "epoch": 2.28, "grad_norm": 8.355395317077637, "learning_rate": 1.1842241379310345e-06, "loss": 0.1954, "step": 231425 }, { "epoch": 2.28, "grad_norm": 12.706974983215332, "learning_rate": 1.183793103448276e-06, "loss": 0.1191, "step": 231450 }, { "epoch": 2.28, "grad_norm": 4.572780609130859, "learning_rate": 1.1833620689655174e-06, "loss": 0.1953, "step": 231475 }, { "epoch": 2.28, "grad_norm": 3.359339714050293, "learning_rate": 1.1829310344827587e-06, "loss": 0.0994, "step": 231500 }, { "epoch": 2.28, "grad_norm": 2.6909987926483154, "learning_rate": 1.1825000000000001e-06, "loss": 0.155, "step": 231525 }, { "epoch": 2.28, "grad_norm": 16.178464889526367, "learning_rate": 1.1820689655172416e-06, "loss": 0.0843, "step": 231550 }, { "epoch": 2.28, "grad_norm": 7.41641092300415, "learning_rate": 1.1816379310344828e-06, "loss": 0.171, "step": 231575 }, { "epoch": 2.28, "grad_norm": 9.552652359008789, "learning_rate": 1.1812068965517243e-06, "loss": 0.0673, "step": 231600 }, { "epoch": 2.28, "grad_norm": 4.257801532745361, "learning_rate": 1.180793103448276e-06, "loss": 0.1445, "step": 231625 }, { "epoch": 2.28, "grad_norm": 11.122405052185059, "learning_rate": 1.1803620689655174e-06, "loss": 0.0851, "step": 231650 }, { "epoch": 2.28, "grad_norm": 4.609269142150879, "learning_rate": 1.1799310344827586e-06, "loss": 0.1687, "step": 231675 }, { "epoch": 2.28, "grad_norm": 8.39367961883545, "learning_rate": 1.1795e-06, "loss": 0.0754, "step": 231700 }, { "epoch": 2.28, "grad_norm": 3.6999504566192627, "learning_rate": 1.1790689655172416e-06, "loss": 0.174, "step": 231725 }, { "epoch": 2.28, "grad_norm": 16.912818908691406, "learning_rate": 1.1786379310344828e-06, "loss": 0.1066, "step": 231750 }, { "epoch": 2.28, "grad_norm": 3.801161527633667, "learning_rate": 1.1782068965517243e-06, "loss": 0.1721, "step": 231775 }, { "epoch": 2.28, "grad_norm": 13.932341575622559, "learning_rate": 1.1777758620689655e-06, "loss": 0.0847, "step": 231800 }, { "epoch": 2.28, "grad_norm": 0.8668574690818787, "learning_rate": 1.177344827586207e-06, "loss": 0.203, "step": 231825 }, { "epoch": 2.28, "grad_norm": 8.78051471710205, "learning_rate": 1.1769137931034484e-06, "loss": 0.1159, "step": 231850 }, { "epoch": 2.28, "grad_norm": 3.8862457275390625, "learning_rate": 1.1764827586206899e-06, "loss": 0.153, "step": 231875 }, { "epoch": 2.28, "grad_norm": 5.872366428375244, "learning_rate": 1.1760517241379311e-06, "loss": 0.0982, "step": 231900 }, { "epoch": 2.28, "grad_norm": 0.08192061632871628, "learning_rate": 1.1756206896551726e-06, "loss": 0.211, "step": 231925 }, { "epoch": 2.28, "grad_norm": 7.918067455291748, "learning_rate": 1.1751896551724138e-06, "loss": 0.1266, "step": 231950 }, { "epoch": 2.28, "grad_norm": 3.00712251663208, "learning_rate": 1.1747586206896553e-06, "loss": 0.1917, "step": 231975 }, { "epoch": 2.28, "grad_norm": 8.334155082702637, "learning_rate": 1.1743275862068965e-06, "loss": 0.105, "step": 232000 }, { "epoch": 2.28, "grad_norm": 1.6350419521331787, "learning_rate": 1.173896551724138e-06, "loss": 0.1619, "step": 232025 }, { "epoch": 2.28, "grad_norm": 8.312986373901367, "learning_rate": 1.1734655172413794e-06, "loss": 0.0692, "step": 232050 }, { "epoch": 2.28, "grad_norm": 4.871220111846924, "learning_rate": 1.1730344827586209e-06, "loss": 0.1643, "step": 232075 }, { "epoch": 2.28, "grad_norm": 12.773228645324707, "learning_rate": 1.1726034482758621e-06, "loss": 0.0893, "step": 232100 }, { "epoch": 2.28, "grad_norm": 3.0800704956054688, "learning_rate": 1.1721724137931036e-06, "loss": 0.1735, "step": 232125 }, { "epoch": 2.28, "grad_norm": 7.417189121246338, "learning_rate": 1.1717413793103448e-06, "loss": 0.1175, "step": 232150 }, { "epoch": 2.28, "grad_norm": 4.9785308837890625, "learning_rate": 1.1713103448275863e-06, "loss": 0.1834, "step": 232175 }, { "epoch": 2.28, "grad_norm": 7.149534225463867, "learning_rate": 1.1708793103448277e-06, "loss": 0.0754, "step": 232200 }, { "epoch": 2.28, "grad_norm": 1.650871992111206, "learning_rate": 1.170448275862069e-06, "loss": 0.1521, "step": 232225 }, { "epoch": 2.28, "grad_norm": 6.935515403747559, "learning_rate": 1.1700172413793104e-06, "loss": 0.0804, "step": 232250 }, { "epoch": 2.28, "grad_norm": 3.9159610271453857, "learning_rate": 1.1695862068965517e-06, "loss": 0.183, "step": 232275 }, { "epoch": 2.28, "grad_norm": 6.280837059020996, "learning_rate": 1.1691551724137931e-06, "loss": 0.0785, "step": 232300 }, { "epoch": 2.28, "grad_norm": 3.057488203048706, "learning_rate": 1.1687241379310346e-06, "loss": 0.1303, "step": 232325 }, { "epoch": 2.28, "grad_norm": 17.409406661987305, "learning_rate": 1.168293103448276e-06, "loss": 0.1182, "step": 232350 }, { "epoch": 2.28, "grad_norm": 9.689237594604492, "learning_rate": 1.1678620689655173e-06, "loss": 0.1841, "step": 232375 }, { "epoch": 2.28, "grad_norm": 4.313689231872559, "learning_rate": 1.1674310344827588e-06, "loss": 0.0906, "step": 232400 }, { "epoch": 2.29, "grad_norm": 2.3494033813476562, "learning_rate": 1.167e-06, "loss": 0.1792, "step": 232425 }, { "epoch": 2.29, "grad_norm": 11.80101203918457, "learning_rate": 1.1665689655172415e-06, "loss": 0.1287, "step": 232450 }, { "epoch": 2.29, "grad_norm": 4.7760796546936035, "learning_rate": 1.1661379310344827e-06, "loss": 0.1742, "step": 232475 }, { "epoch": 2.29, "grad_norm": 9.742036819458008, "learning_rate": 1.1657068965517242e-06, "loss": 0.1201, "step": 232500 }, { "epoch": 2.29, "grad_norm": 0.9398053288459778, "learning_rate": 1.1652758620689656e-06, "loss": 0.1879, "step": 232525 }, { "epoch": 2.29, "grad_norm": 9.000467300415039, "learning_rate": 1.164844827586207e-06, "loss": 0.0945, "step": 232550 }, { "epoch": 2.29, "grad_norm": 4.434271812438965, "learning_rate": 1.1644137931034483e-06, "loss": 0.1839, "step": 232575 }, { "epoch": 2.29, "grad_norm": 12.151409149169922, "learning_rate": 1.1639827586206898e-06, "loss": 0.1115, "step": 232600 }, { "epoch": 2.29, "grad_norm": 7.451460838317871, "learning_rate": 1.1635517241379312e-06, "loss": 0.1687, "step": 232625 }, { "epoch": 2.29, "grad_norm": 11.775946617126465, "learning_rate": 1.1631206896551725e-06, "loss": 0.0783, "step": 232650 }, { "epoch": 2.29, "grad_norm": 6.227093696594238, "learning_rate": 1.162689655172414e-06, "loss": 0.1559, "step": 232675 }, { "epoch": 2.29, "grad_norm": 9.744943618774414, "learning_rate": 1.1622586206896552e-06, "loss": 0.124, "step": 232700 }, { "epoch": 2.29, "grad_norm": 3.340893030166626, "learning_rate": 1.1618275862068966e-06, "loss": 0.1888, "step": 232725 }, { "epoch": 2.29, "grad_norm": 13.68252944946289, "learning_rate": 1.161396551724138e-06, "loss": 0.1136, "step": 232750 }, { "epoch": 2.29, "grad_norm": 4.514462471008301, "learning_rate": 1.1609655172413796e-06, "loss": 0.2235, "step": 232775 }, { "epoch": 2.29, "grad_norm": 7.880566120147705, "learning_rate": 1.1605344827586208e-06, "loss": 0.0831, "step": 232800 }, { "epoch": 2.29, "grad_norm": 1.9251201152801514, "learning_rate": 1.1601034482758623e-06, "loss": 0.1738, "step": 232825 }, { "epoch": 2.29, "grad_norm": 15.216948509216309, "learning_rate": 1.1596724137931035e-06, "loss": 0.0994, "step": 232850 }, { "epoch": 2.29, "grad_norm": 4.373601913452148, "learning_rate": 1.159241379310345e-06, "loss": 0.1583, "step": 232875 }, { "epoch": 2.29, "grad_norm": 9.95225715637207, "learning_rate": 1.1588103448275862e-06, "loss": 0.0974, "step": 232900 }, { "epoch": 2.29, "grad_norm": 4.002606391906738, "learning_rate": 1.1583793103448277e-06, "loss": 0.149, "step": 232925 }, { "epoch": 2.29, "grad_norm": 10.862151145935059, "learning_rate": 1.1579482758620691e-06, "loss": 0.095, "step": 232950 }, { "epoch": 2.29, "grad_norm": 3.9988768100738525, "learning_rate": 1.1575172413793104e-06, "loss": 0.19, "step": 232975 }, { "epoch": 2.29, "grad_norm": 9.203450202941895, "learning_rate": 1.1570862068965518e-06, "loss": 0.0774, "step": 233000 }, { "epoch": 2.29, "grad_norm": 3.3074581623077393, "learning_rate": 1.1566551724137933e-06, "loss": 0.1618, "step": 233025 }, { "epoch": 2.29, "grad_norm": 8.211965560913086, "learning_rate": 1.1562241379310345e-06, "loss": 0.107, "step": 233050 }, { "epoch": 2.29, "grad_norm": 7.440793514251709, "learning_rate": 1.155793103448276e-06, "loss": 0.1821, "step": 233075 }, { "epoch": 2.29, "grad_norm": 18.414043426513672, "learning_rate": 1.1553620689655174e-06, "loss": 0.122, "step": 233100 }, { "epoch": 2.29, "grad_norm": 6.855574607849121, "learning_rate": 1.1549310344827587e-06, "loss": 0.1647, "step": 233125 }, { "epoch": 2.29, "grad_norm": 12.690994262695312, "learning_rate": 1.1545000000000001e-06, "loss": 0.1087, "step": 233150 }, { "epoch": 2.29, "grad_norm": 2.961153745651245, "learning_rate": 1.1540689655172414e-06, "loss": 0.1565, "step": 233175 }, { "epoch": 2.29, "grad_norm": 15.93073558807373, "learning_rate": 1.1536379310344828e-06, "loss": 0.092, "step": 233200 }, { "epoch": 2.29, "grad_norm": 5.229334831237793, "learning_rate": 1.1532068965517243e-06, "loss": 0.1519, "step": 233225 }, { "epoch": 2.29, "grad_norm": 8.385188102722168, "learning_rate": 1.1527758620689657e-06, "loss": 0.103, "step": 233250 }, { "epoch": 2.29, "grad_norm": 1.2387328147888184, "learning_rate": 1.152344827586207e-06, "loss": 0.1879, "step": 233275 }, { "epoch": 2.29, "grad_norm": 6.762309551239014, "learning_rate": 1.1519137931034484e-06, "loss": 0.0899, "step": 233300 }, { "epoch": 2.29, "grad_norm": 2.771003246307373, "learning_rate": 1.1514827586206897e-06, "loss": 0.1499, "step": 233325 }, { "epoch": 2.29, "grad_norm": 13.645498275756836, "learning_rate": 1.1510517241379311e-06, "loss": 0.0865, "step": 233350 }, { "epoch": 2.29, "grad_norm": 3.187594413757324, "learning_rate": 1.1506206896551724e-06, "loss": 0.1931, "step": 233375 }, { "epoch": 2.29, "grad_norm": 11.54175853729248, "learning_rate": 1.1501896551724138e-06, "loss": 0.0872, "step": 233400 }, { "epoch": 2.3, "grad_norm": 4.337911605834961, "learning_rate": 1.1497586206896553e-06, "loss": 0.165, "step": 233425 }, { "epoch": 2.3, "grad_norm": 10.470808982849121, "learning_rate": 1.1493275862068965e-06, "loss": 0.0882, "step": 233450 }, { "epoch": 2.3, "grad_norm": 3.515791177749634, "learning_rate": 1.148896551724138e-06, "loss": 0.1929, "step": 233475 }, { "epoch": 2.3, "grad_norm": 8.935258865356445, "learning_rate": 1.1484655172413795e-06, "loss": 0.0989, "step": 233500 }, { "epoch": 2.3, "grad_norm": 10.878894805908203, "learning_rate": 1.148034482758621e-06, "loss": 0.1611, "step": 233525 }, { "epoch": 2.3, "grad_norm": 16.020736694335938, "learning_rate": 1.1476034482758622e-06, "loss": 0.0961, "step": 233550 }, { "epoch": 2.3, "grad_norm": 4.113332748413086, "learning_rate": 1.1471724137931036e-06, "loss": 0.1524, "step": 233575 }, { "epoch": 2.3, "grad_norm": 11.977262496948242, "learning_rate": 1.1467413793103449e-06, "loss": 0.1012, "step": 233600 }, { "epoch": 2.3, "grad_norm": 3.324587106704712, "learning_rate": 1.1463103448275863e-06, "loss": 0.1405, "step": 233625 }, { "epoch": 2.3, "grad_norm": 5.842414379119873, "learning_rate": 1.1458793103448276e-06, "loss": 0.0674, "step": 233650 }, { "epoch": 2.3, "grad_norm": 1.9490481615066528, "learning_rate": 1.1454655172413794e-06, "loss": 0.1236, "step": 233675 }, { "epoch": 2.3, "grad_norm": 10.035049438476562, "learning_rate": 1.1450344827586207e-06, "loss": 0.0946, "step": 233700 }, { "epoch": 2.3, "grad_norm": 2.5482561588287354, "learning_rate": 1.1446034482758621e-06, "loss": 0.1628, "step": 233725 }, { "epoch": 2.3, "grad_norm": 35.25732421875, "learning_rate": 1.1441724137931036e-06, "loss": 0.0875, "step": 233750 }, { "epoch": 2.3, "grad_norm": 1.7740933895111084, "learning_rate": 1.143741379310345e-06, "loss": 0.1971, "step": 233775 }, { "epoch": 2.3, "grad_norm": 10.66073989868164, "learning_rate": 1.1433103448275863e-06, "loss": 0.0933, "step": 233800 }, { "epoch": 2.3, "grad_norm": 2.958409070968628, "learning_rate": 1.1428793103448278e-06, "loss": 0.2236, "step": 233825 }, { "epoch": 2.3, "grad_norm": 6.597765922546387, "learning_rate": 1.142448275862069e-06, "loss": 0.1333, "step": 233850 }, { "epoch": 2.3, "grad_norm": 9.542651176452637, "learning_rate": 1.1420172413793105e-06, "loss": 0.1849, "step": 233875 }, { "epoch": 2.3, "grad_norm": 3.3156769275665283, "learning_rate": 1.1415862068965517e-06, "loss": 0.0784, "step": 233900 }, { "epoch": 2.3, "grad_norm": 3.787269353866577, "learning_rate": 1.1411551724137932e-06, "loss": 0.1507, "step": 233925 }, { "epoch": 2.3, "grad_norm": 18.485877990722656, "learning_rate": 1.1407241379310344e-06, "loss": 0.1347, "step": 233950 }, { "epoch": 2.3, "grad_norm": 3.150498628616333, "learning_rate": 1.140293103448276e-06, "loss": 0.1534, "step": 233975 }, { "epoch": 2.3, "grad_norm": 8.95355224609375, "learning_rate": 1.1398620689655173e-06, "loss": 0.092, "step": 234000 }, { "epoch": 2.3, "grad_norm": 1.5322238206863403, "learning_rate": 1.1394310344827588e-06, "loss": 0.1324, "step": 234025 }, { "epoch": 2.3, "grad_norm": 17.017704010009766, "learning_rate": 1.139e-06, "loss": 0.0945, "step": 234050 }, { "epoch": 2.3, "grad_norm": 3.940702199935913, "learning_rate": 1.1385689655172415e-06, "loss": 0.2231, "step": 234075 }, { "epoch": 2.3, "grad_norm": 8.875381469726562, "learning_rate": 1.138137931034483e-06, "loss": 0.0841, "step": 234100 }, { "epoch": 2.3, "grad_norm": 6.878422737121582, "learning_rate": 1.1377068965517242e-06, "loss": 0.2123, "step": 234125 }, { "epoch": 2.3, "grad_norm": 14.269010543823242, "learning_rate": 1.1372758620689656e-06, "loss": 0.0808, "step": 234150 }, { "epoch": 2.3, "grad_norm": 8.101669311523438, "learning_rate": 1.1368448275862069e-06, "loss": 0.1808, "step": 234175 }, { "epoch": 2.3, "grad_norm": 10.906487464904785, "learning_rate": 1.1364137931034483e-06, "loss": 0.0888, "step": 234200 }, { "epoch": 2.3, "grad_norm": 4.732055187225342, "learning_rate": 1.1359827586206898e-06, "loss": 0.2134, "step": 234225 }, { "epoch": 2.3, "grad_norm": 10.303337097167969, "learning_rate": 1.1355517241379312e-06, "loss": 0.1113, "step": 234250 }, { "epoch": 2.3, "grad_norm": 5.492320537567139, "learning_rate": 1.1351206896551725e-06, "loss": 0.164, "step": 234275 }, { "epoch": 2.3, "grad_norm": 14.797457695007324, "learning_rate": 1.134689655172414e-06, "loss": 0.0972, "step": 234300 }, { "epoch": 2.3, "grad_norm": 5.928015232086182, "learning_rate": 1.1342586206896552e-06, "loss": 0.1691, "step": 234325 }, { "epoch": 2.3, "grad_norm": 10.272028923034668, "learning_rate": 1.1338275862068967e-06, "loss": 0.118, "step": 234350 }, { "epoch": 2.3, "grad_norm": 5.378414630889893, "learning_rate": 1.133396551724138e-06, "loss": 0.1807, "step": 234375 }, { "epoch": 2.3, "grad_norm": 11.970815658569336, "learning_rate": 1.1329655172413794e-06, "loss": 0.1202, "step": 234400 }, { "epoch": 2.3, "grad_norm": 3.5799973011016846, "learning_rate": 1.1325344827586208e-06, "loss": 0.2404, "step": 234425 }, { "epoch": 2.31, "grad_norm": 11.786253929138184, "learning_rate": 1.1321034482758623e-06, "loss": 0.0889, "step": 234450 }, { "epoch": 2.31, "grad_norm": 7.749617099761963, "learning_rate": 1.1316724137931035e-06, "loss": 0.147, "step": 234475 }, { "epoch": 2.31, "grad_norm": 14.580967903137207, "learning_rate": 1.131241379310345e-06, "loss": 0.091, "step": 234500 }, { "epoch": 2.31, "grad_norm": 3.8273494243621826, "learning_rate": 1.1308103448275862e-06, "loss": 0.1618, "step": 234525 }, { "epoch": 2.31, "grad_norm": 8.471150398254395, "learning_rate": 1.1303793103448277e-06, "loss": 0.101, "step": 234550 }, { "epoch": 2.31, "grad_norm": 0.8922187685966492, "learning_rate": 1.1299482758620691e-06, "loss": 0.1958, "step": 234575 }, { "epoch": 2.31, "grad_norm": 11.692744255065918, "learning_rate": 1.1295172413793104e-06, "loss": 0.0917, "step": 234600 }, { "epoch": 2.31, "grad_norm": 1.9625808000564575, "learning_rate": 1.1290862068965518e-06, "loss": 0.1633, "step": 234625 }, { "epoch": 2.31, "grad_norm": 9.771036148071289, "learning_rate": 1.128655172413793e-06, "loss": 0.0828, "step": 234650 }, { "epoch": 2.31, "grad_norm": 2.5836541652679443, "learning_rate": 1.1282241379310347e-06, "loss": 0.1517, "step": 234675 }, { "epoch": 2.31, "grad_norm": 6.8613176345825195, "learning_rate": 1.127793103448276e-06, "loss": 0.0812, "step": 234700 }, { "epoch": 2.31, "grad_norm": 5.282426357269287, "learning_rate": 1.1273620689655174e-06, "loss": 0.1762, "step": 234725 }, { "epoch": 2.31, "grad_norm": 8.184009552001953, "learning_rate": 1.1269310344827587e-06, "loss": 0.0699, "step": 234750 }, { "epoch": 2.31, "grad_norm": 2.5842370986938477, "learning_rate": 1.1265000000000001e-06, "loss": 0.17, "step": 234775 }, { "epoch": 2.31, "grad_norm": 11.203876495361328, "learning_rate": 1.1260689655172414e-06, "loss": 0.0882, "step": 234800 }, { "epoch": 2.31, "grad_norm": 3.2628960609436035, "learning_rate": 1.1256379310344828e-06, "loss": 0.1702, "step": 234825 }, { "epoch": 2.31, "grad_norm": 10.542951583862305, "learning_rate": 1.125206896551724e-06, "loss": 0.139, "step": 234850 }, { "epoch": 2.31, "grad_norm": 8.552316665649414, "learning_rate": 1.1247758620689655e-06, "loss": 0.1435, "step": 234875 }, { "epoch": 2.31, "grad_norm": 9.926080703735352, "learning_rate": 1.124344827586207e-06, "loss": 0.1109, "step": 234900 }, { "epoch": 2.31, "grad_norm": 3.558472156524658, "learning_rate": 1.1239137931034485e-06, "loss": 0.1921, "step": 234925 }, { "epoch": 2.31, "grad_norm": 10.452342987060547, "learning_rate": 1.1234827586206897e-06, "loss": 0.1056, "step": 234950 }, { "epoch": 2.31, "grad_norm": 3.920027494430542, "learning_rate": 1.1230517241379312e-06, "loss": 0.1798, "step": 234975 }, { "epoch": 2.31, "grad_norm": 9.242743492126465, "learning_rate": 1.1226206896551726e-06, "loss": 0.0667, "step": 235000 }, { "epoch": 2.31, "grad_norm": 0.3106132447719574, "learning_rate": 1.1221896551724139e-06, "loss": 0.1771, "step": 235025 }, { "epoch": 2.31, "grad_norm": 10.5948486328125, "learning_rate": 1.1217586206896553e-06, "loss": 0.114, "step": 235050 }, { "epoch": 2.31, "grad_norm": 5.372556209564209, "learning_rate": 1.1213275862068966e-06, "loss": 0.1632, "step": 235075 }, { "epoch": 2.31, "grad_norm": 8.20876693725586, "learning_rate": 1.120896551724138e-06, "loss": 0.0864, "step": 235100 }, { "epoch": 2.31, "grad_norm": 3.308737277984619, "learning_rate": 1.1204655172413793e-06, "loss": 0.1653, "step": 235125 }, { "epoch": 2.31, "grad_norm": 11.027634620666504, "learning_rate": 1.120034482758621e-06, "loss": 0.114, "step": 235150 }, { "epoch": 2.31, "grad_norm": 4.593695163726807, "learning_rate": 1.1196034482758622e-06, "loss": 0.1873, "step": 235175 }, { "epoch": 2.31, "grad_norm": 6.858786582946777, "learning_rate": 1.1191724137931036e-06, "loss": 0.0953, "step": 235200 }, { "epoch": 2.31, "grad_norm": 6.571032524108887, "learning_rate": 1.1187413793103449e-06, "loss": 0.1564, "step": 235225 }, { "epoch": 2.31, "grad_norm": 11.763498306274414, "learning_rate": 1.1183103448275863e-06, "loss": 0.1014, "step": 235250 }, { "epoch": 2.31, "grad_norm": 1.2782173156738281, "learning_rate": 1.1178793103448276e-06, "loss": 0.1717, "step": 235275 }, { "epoch": 2.31, "grad_norm": 13.064314842224121, "learning_rate": 1.117448275862069e-06, "loss": 0.1058, "step": 235300 }, { "epoch": 2.31, "grad_norm": 2.6564979553222656, "learning_rate": 1.1170172413793105e-06, "loss": 0.194, "step": 235325 }, { "epoch": 2.31, "grad_norm": 7.065001010894775, "learning_rate": 1.1165862068965517e-06, "loss": 0.097, "step": 235350 }, { "epoch": 2.31, "grad_norm": 7.538932800292969, "learning_rate": 1.1161551724137932e-06, "loss": 0.2105, "step": 235375 }, { "epoch": 2.31, "grad_norm": 8.662020683288574, "learning_rate": 1.1157241379310346e-06, "loss": 0.0824, "step": 235400 }, { "epoch": 2.31, "grad_norm": 2.5069491863250732, "learning_rate": 1.1152931034482759e-06, "loss": 0.1419, "step": 235425 }, { "epoch": 2.31, "grad_norm": 10.93923282623291, "learning_rate": 1.1148620689655173e-06, "loss": 0.1044, "step": 235450 }, { "epoch": 2.32, "grad_norm": 0.7136614918708801, "learning_rate": 1.1144310344827588e-06, "loss": 0.162, "step": 235475 }, { "epoch": 2.32, "grad_norm": 9.482246398925781, "learning_rate": 1.114e-06, "loss": 0.1002, "step": 235500 }, { "epoch": 2.32, "grad_norm": 6.289880752563477, "learning_rate": 1.1135689655172415e-06, "loss": 0.1632, "step": 235525 }, { "epoch": 2.32, "grad_norm": 16.125473022460938, "learning_rate": 1.1131379310344828e-06, "loss": 0.1048, "step": 235550 }, { "epoch": 2.32, "grad_norm": 3.1466896533966064, "learning_rate": 1.1127068965517242e-06, "loss": 0.1978, "step": 235575 }, { "epoch": 2.32, "grad_norm": 5.8035101890563965, "learning_rate": 1.1122758620689655e-06, "loss": 0.1189, "step": 235600 }, { "epoch": 2.32, "grad_norm": 1.6649233102798462, "learning_rate": 1.1118448275862071e-06, "loss": 0.1841, "step": 235625 }, { "epoch": 2.32, "grad_norm": 10.569869995117188, "learning_rate": 1.1114137931034484e-06, "loss": 0.1117, "step": 235650 }, { "epoch": 2.32, "grad_norm": 3.4602925777435303, "learning_rate": 1.1109827586206898e-06, "loss": 0.2239, "step": 235675 }, { "epoch": 2.32, "grad_norm": 11.557621002197266, "learning_rate": 1.110551724137931e-06, "loss": 0.1059, "step": 235700 }, { "epoch": 2.32, "grad_norm": 6.0512309074401855, "learning_rate": 1.110137931034483e-06, "loss": 0.2001, "step": 235725 }, { "epoch": 2.32, "grad_norm": 7.5153608322143555, "learning_rate": 1.1097068965517242e-06, "loss": 0.1002, "step": 235750 }, { "epoch": 2.32, "grad_norm": 9.250454902648926, "learning_rate": 1.1092758620689656e-06, "loss": 0.1635, "step": 235775 }, { "epoch": 2.32, "grad_norm": 12.464805603027344, "learning_rate": 1.108844827586207e-06, "loss": 0.0949, "step": 235800 }, { "epoch": 2.32, "grad_norm": 3.067535638809204, "learning_rate": 1.1084137931034484e-06, "loss": 0.1778, "step": 235825 }, { "epoch": 2.32, "grad_norm": 15.48173713684082, "learning_rate": 1.1079827586206896e-06, "loss": 0.0942, "step": 235850 }, { "epoch": 2.32, "grad_norm": 6.8924641609191895, "learning_rate": 1.1075517241379313e-06, "loss": 0.1842, "step": 235875 }, { "epoch": 2.32, "grad_norm": 9.662484169006348, "learning_rate": 1.1071206896551725e-06, "loss": 0.0869, "step": 235900 }, { "epoch": 2.32, "grad_norm": 0.25913044810295105, "learning_rate": 1.106689655172414e-06, "loss": 0.16, "step": 235925 }, { "epoch": 2.32, "grad_norm": 8.892420768737793, "learning_rate": 1.1062586206896552e-06, "loss": 0.0648, "step": 235950 }, { "epoch": 2.32, "grad_norm": 0.7361515760421753, "learning_rate": 1.1058275862068967e-06, "loss": 0.1804, "step": 235975 }, { "epoch": 2.32, "grad_norm": 6.25724983215332, "learning_rate": 1.105396551724138e-06, "loss": 0.0865, "step": 236000 }, { "epoch": 2.32, "grad_norm": 1.8654215335845947, "learning_rate": 1.1049655172413794e-06, "loss": 0.17, "step": 236025 }, { "epoch": 2.32, "grad_norm": 7.001698017120361, "learning_rate": 1.1045344827586208e-06, "loss": 0.0932, "step": 236050 }, { "epoch": 2.32, "grad_norm": 4.801675319671631, "learning_rate": 1.104103448275862e-06, "loss": 0.1767, "step": 236075 }, { "epoch": 2.32, "grad_norm": 8.354141235351562, "learning_rate": 1.1036724137931035e-06, "loss": 0.0909, "step": 236100 }, { "epoch": 2.32, "grad_norm": 5.137337684631348, "learning_rate": 1.103241379310345e-06, "loss": 0.186, "step": 236125 }, { "epoch": 2.32, "grad_norm": 10.311752319335938, "learning_rate": 1.1028103448275864e-06, "loss": 0.0976, "step": 236150 }, { "epoch": 2.32, "grad_norm": 3.90852689743042, "learning_rate": 1.1023793103448277e-06, "loss": 0.1718, "step": 236175 }, { "epoch": 2.32, "grad_norm": 10.508111953735352, "learning_rate": 1.1019482758620691e-06, "loss": 0.0921, "step": 236200 }, { "epoch": 2.32, "grad_norm": 4.570937633514404, "learning_rate": 1.1015172413793104e-06, "loss": 0.2657, "step": 236225 }, { "epoch": 2.32, "grad_norm": 5.691992282867432, "learning_rate": 1.1010862068965518e-06, "loss": 0.0938, "step": 236250 }, { "epoch": 2.32, "grad_norm": 0.8436453938484192, "learning_rate": 1.100655172413793e-06, "loss": 0.1818, "step": 236275 }, { "epoch": 2.32, "grad_norm": 4.458618640899658, "learning_rate": 1.1002241379310345e-06, "loss": 0.0845, "step": 236300 }, { "epoch": 2.32, "grad_norm": 2.3711791038513184, "learning_rate": 1.0997931034482758e-06, "loss": 0.1602, "step": 236325 }, { "epoch": 2.32, "grad_norm": 11.008706092834473, "learning_rate": 1.0993620689655175e-06, "loss": 0.0767, "step": 236350 }, { "epoch": 2.32, "grad_norm": 4.479106903076172, "learning_rate": 1.0989310344827587e-06, "loss": 0.1579, "step": 236375 }, { "epoch": 2.32, "grad_norm": 13.362431526184082, "learning_rate": 1.0985000000000002e-06, "loss": 0.0914, "step": 236400 }, { "epoch": 2.32, "grad_norm": 3.28025221824646, "learning_rate": 1.0980689655172414e-06, "loss": 0.1645, "step": 236425 }, { "epoch": 2.32, "grad_norm": 10.144447326660156, "learning_rate": 1.0976379310344829e-06, "loss": 0.1448, "step": 236450 }, { "epoch": 2.33, "grad_norm": 5.493442535400391, "learning_rate": 1.0972068965517243e-06, "loss": 0.1519, "step": 236475 }, { "epoch": 2.33, "grad_norm": 5.785346031188965, "learning_rate": 1.0967758620689656e-06, "loss": 0.0738, "step": 236500 }, { "epoch": 2.33, "grad_norm": 1.2136284112930298, "learning_rate": 1.096344827586207e-06, "loss": 0.1766, "step": 236525 }, { "epoch": 2.33, "grad_norm": 9.258548736572266, "learning_rate": 1.0959137931034483e-06, "loss": 0.0867, "step": 236550 }, { "epoch": 2.33, "grad_norm": 8.004472732543945, "learning_rate": 1.0954827586206897e-06, "loss": 0.1746, "step": 236575 }, { "epoch": 2.33, "grad_norm": 6.367832660675049, "learning_rate": 1.0950517241379312e-06, "loss": 0.0584, "step": 236600 }, { "epoch": 2.33, "grad_norm": 7.963356971740723, "learning_rate": 1.0946206896551726e-06, "loss": 0.181, "step": 236625 }, { "epoch": 2.33, "grad_norm": 13.830289840698242, "learning_rate": 1.0941896551724139e-06, "loss": 0.1062, "step": 236650 }, { "epoch": 2.33, "grad_norm": 2.954437732696533, "learning_rate": 1.0937586206896553e-06, "loss": 0.175, "step": 236675 }, { "epoch": 2.33, "grad_norm": 9.371757507324219, "learning_rate": 1.0933275862068966e-06, "loss": 0.0547, "step": 236700 }, { "epoch": 2.33, "grad_norm": 3.6138720512390137, "learning_rate": 1.092896551724138e-06, "loss": 0.1727, "step": 236725 }, { "epoch": 2.33, "grad_norm": 7.912764549255371, "learning_rate": 1.0924655172413793e-06, "loss": 0.0995, "step": 236750 }, { "epoch": 2.33, "grad_norm": 3.873110055923462, "learning_rate": 1.0920344827586207e-06, "loss": 0.1551, "step": 236775 }, { "epoch": 2.33, "grad_norm": 2.777174711227417, "learning_rate": 1.0916034482758622e-06, "loss": 0.0896, "step": 236800 }, { "epoch": 2.33, "grad_norm": 4.073988437652588, "learning_rate": 1.0911724137931036e-06, "loss": 0.1948, "step": 236825 }, { "epoch": 2.33, "grad_norm": 7.459394454956055, "learning_rate": 1.0907413793103449e-06, "loss": 0.0897, "step": 236850 }, { "epoch": 2.33, "grad_norm": 6.092846393585205, "learning_rate": 1.0903103448275863e-06, "loss": 0.1796, "step": 236875 }, { "epoch": 2.33, "grad_norm": 11.479205131530762, "learning_rate": 1.0898793103448276e-06, "loss": 0.1033, "step": 236900 }, { "epoch": 2.33, "grad_norm": 3.8992936611175537, "learning_rate": 1.089448275862069e-06, "loss": 0.1691, "step": 236925 }, { "epoch": 2.33, "grad_norm": 15.608899116516113, "learning_rate": 1.0890172413793105e-06, "loss": 0.102, "step": 236950 }, { "epoch": 2.33, "grad_norm": 5.966616630554199, "learning_rate": 1.0885862068965517e-06, "loss": 0.1866, "step": 236975 }, { "epoch": 2.33, "grad_norm": 13.384490966796875, "learning_rate": 1.0881551724137932e-06, "loss": 0.0919, "step": 237000 }, { "epoch": 2.33, "grad_norm": 4.719394683837891, "learning_rate": 1.0877241379310344e-06, "loss": 0.1572, "step": 237025 }, { "epoch": 2.33, "grad_norm": 35.99137878417969, "learning_rate": 1.0872931034482761e-06, "loss": 0.0851, "step": 237050 }, { "epoch": 2.33, "grad_norm": 1.014258623123169, "learning_rate": 1.0868620689655174e-06, "loss": 0.1767, "step": 237075 }, { "epoch": 2.33, "grad_norm": 7.366882801055908, "learning_rate": 1.0864310344827588e-06, "loss": 0.0977, "step": 237100 }, { "epoch": 2.33, "grad_norm": 0.26495105028152466, "learning_rate": 1.086e-06, "loss": 0.1757, "step": 237125 }, { "epoch": 2.33, "grad_norm": 7.246520042419434, "learning_rate": 1.0855689655172415e-06, "loss": 0.0843, "step": 237150 }, { "epoch": 2.33, "grad_norm": 4.22680139541626, "learning_rate": 1.0851379310344828e-06, "loss": 0.1797, "step": 237175 }, { "epoch": 2.33, "grad_norm": 11.518821716308594, "learning_rate": 1.0847068965517242e-06, "loss": 0.1121, "step": 237200 }, { "epoch": 2.33, "grad_norm": 3.309508800506592, "learning_rate": 1.0842758620689655e-06, "loss": 0.1714, "step": 237225 }, { "epoch": 2.33, "grad_norm": 11.826066017150879, "learning_rate": 1.083844827586207e-06, "loss": 0.1018, "step": 237250 }, { "epoch": 2.33, "grad_norm": 2.5464437007904053, "learning_rate": 1.0834137931034484e-06, "loss": 0.1867, "step": 237275 }, { "epoch": 2.33, "grad_norm": 14.90108585357666, "learning_rate": 1.0829827586206898e-06, "loss": 0.1032, "step": 237300 }, { "epoch": 2.33, "grad_norm": 4.432456970214844, "learning_rate": 1.082551724137931e-06, "loss": 0.1681, "step": 237325 }, { "epoch": 2.33, "grad_norm": 12.258447647094727, "learning_rate": 1.0821206896551725e-06, "loss": 0.1071, "step": 237350 }, { "epoch": 2.33, "grad_norm": 2.417950391769409, "learning_rate": 1.081689655172414e-06, "loss": 0.1722, "step": 237375 }, { "epoch": 2.33, "grad_norm": 9.247032165527344, "learning_rate": 1.0812586206896552e-06, "loss": 0.1322, "step": 237400 }, { "epoch": 2.33, "grad_norm": 3.941936731338501, "learning_rate": 1.0808275862068967e-06, "loss": 0.2013, "step": 237425 }, { "epoch": 2.33, "grad_norm": 5.999577522277832, "learning_rate": 1.080396551724138e-06, "loss": 0.0801, "step": 237450 }, { "epoch": 2.33, "grad_norm": 2.808879852294922, "learning_rate": 1.0799655172413794e-06, "loss": 0.1669, "step": 237475 }, { "epoch": 2.34, "grad_norm": 11.445396423339844, "learning_rate": 1.0795344827586206e-06, "loss": 0.111, "step": 237500 }, { "epoch": 2.34, "grad_norm": 5.7419843673706055, "learning_rate": 1.0791034482758623e-06, "loss": 0.1835, "step": 237525 }, { "epoch": 2.34, "grad_norm": 12.504044532775879, "learning_rate": 1.0786724137931036e-06, "loss": 0.0907, "step": 237550 }, { "epoch": 2.34, "grad_norm": 2.9041643142700195, "learning_rate": 1.078241379310345e-06, "loss": 0.2113, "step": 237575 }, { "epoch": 2.34, "grad_norm": 31.773611068725586, "learning_rate": 1.0778103448275863e-06, "loss": 0.1038, "step": 237600 }, { "epoch": 2.34, "grad_norm": 4.930767059326172, "learning_rate": 1.0773793103448277e-06, "loss": 0.1797, "step": 237625 }, { "epoch": 2.34, "grad_norm": 14.588335990905762, "learning_rate": 1.076948275862069e-06, "loss": 0.1006, "step": 237650 }, { "epoch": 2.34, "grad_norm": 2.672207832336426, "learning_rate": 1.0765172413793104e-06, "loss": 0.1851, "step": 237675 }, { "epoch": 2.34, "grad_norm": 14.020164489746094, "learning_rate": 1.0760862068965519e-06, "loss": 0.123, "step": 237700 }, { "epoch": 2.34, "grad_norm": 3.0780751705169678, "learning_rate": 1.0756724137931035e-06, "loss": 0.172, "step": 237725 }, { "epoch": 2.34, "grad_norm": 11.79980754852295, "learning_rate": 1.0752413793103448e-06, "loss": 0.1089, "step": 237750 }, { "epoch": 2.34, "grad_norm": 2.9355621337890625, "learning_rate": 1.0748103448275862e-06, "loss": 0.1754, "step": 237775 }, { "epoch": 2.34, "grad_norm": 8.98826789855957, "learning_rate": 1.0743793103448277e-06, "loss": 0.1023, "step": 237800 }, { "epoch": 2.34, "grad_norm": 1.630350947380066, "learning_rate": 1.0739482758620692e-06, "loss": 0.1913, "step": 237825 }, { "epoch": 2.34, "grad_norm": 8.05062484741211, "learning_rate": 1.0735172413793104e-06, "loss": 0.0931, "step": 237850 }, { "epoch": 2.34, "grad_norm": 2.5766730308532715, "learning_rate": 1.0730862068965519e-06, "loss": 0.1938, "step": 237875 }, { "epoch": 2.34, "grad_norm": 9.697030067443848, "learning_rate": 1.072655172413793e-06, "loss": 0.0919, "step": 237900 }, { "epoch": 2.34, "grad_norm": 5.940489768981934, "learning_rate": 1.0722241379310346e-06, "loss": 0.181, "step": 237925 }, { "epoch": 2.34, "grad_norm": 10.403553009033203, "learning_rate": 1.071793103448276e-06, "loss": 0.0875, "step": 237950 }, { "epoch": 2.34, "grad_norm": 4.19561243057251, "learning_rate": 1.0713620689655173e-06, "loss": 0.2179, "step": 237975 }, { "epoch": 2.34, "grad_norm": 8.691682815551758, "learning_rate": 1.0709310344827587e-06, "loss": 0.0909, "step": 238000 }, { "epoch": 2.34, "grad_norm": 6.014320373535156, "learning_rate": 1.0705000000000002e-06, "loss": 0.1891, "step": 238025 }, { "epoch": 2.34, "grad_norm": 14.098572731018066, "learning_rate": 1.0700689655172414e-06, "loss": 0.1018, "step": 238050 }, { "epoch": 2.34, "grad_norm": 2.9555881023406982, "learning_rate": 1.0696379310344829e-06, "loss": 0.2032, "step": 238075 }, { "epoch": 2.34, "grad_norm": 10.23123836517334, "learning_rate": 1.0692068965517243e-06, "loss": 0.0864, "step": 238100 }, { "epoch": 2.34, "grad_norm": 7.192529678344727, "learning_rate": 1.0687758620689656e-06, "loss": 0.1898, "step": 238125 }, { "epoch": 2.34, "grad_norm": 7.771280765533447, "learning_rate": 1.068344827586207e-06, "loss": 0.1252, "step": 238150 }, { "epoch": 2.34, "grad_norm": 4.7625837326049805, "learning_rate": 1.0679137931034483e-06, "loss": 0.1648, "step": 238175 }, { "epoch": 2.34, "grad_norm": 3.275712013244629, "learning_rate": 1.0674827586206897e-06, "loss": 0.1046, "step": 238200 }, { "epoch": 2.34, "grad_norm": 2.854083776473999, "learning_rate": 1.067051724137931e-06, "loss": 0.1899, "step": 238225 }, { "epoch": 2.34, "grad_norm": 14.275157928466797, "learning_rate": 1.0666206896551726e-06, "loss": 0.101, "step": 238250 }, { "epoch": 2.34, "grad_norm": 4.549721717834473, "learning_rate": 1.0661896551724139e-06, "loss": 0.1797, "step": 238275 }, { "epoch": 2.34, "grad_norm": 9.075417518615723, "learning_rate": 1.0657586206896553e-06, "loss": 0.0816, "step": 238300 }, { "epoch": 2.34, "grad_norm": 6.845101356506348, "learning_rate": 1.0653275862068966e-06, "loss": 0.1718, "step": 238325 }, { "epoch": 2.34, "grad_norm": 13.306537628173828, "learning_rate": 1.064896551724138e-06, "loss": 0.0857, "step": 238350 }, { "epoch": 2.34, "grad_norm": 5.272736072540283, "learning_rate": 1.0644655172413793e-06, "loss": 0.15, "step": 238375 }, { "epoch": 2.34, "grad_norm": 8.552332878112793, "learning_rate": 1.0640344827586207e-06, "loss": 0.0901, "step": 238400 }, { "epoch": 2.34, "grad_norm": 4.853752613067627, "learning_rate": 1.0636034482758622e-06, "loss": 0.1643, "step": 238425 }, { "epoch": 2.34, "grad_norm": 7.490541934967041, "learning_rate": 1.0631724137931034e-06, "loss": 0.0942, "step": 238450 }, { "epoch": 2.34, "grad_norm": 5.237271308898926, "learning_rate": 1.062741379310345e-06, "loss": 0.1777, "step": 238475 }, { "epoch": 2.34, "grad_norm": 7.09126615524292, "learning_rate": 1.0623103448275864e-06, "loss": 0.1167, "step": 238500 }, { "epoch": 2.35, "grad_norm": 7.491316318511963, "learning_rate": 1.0618793103448278e-06, "loss": 0.1655, "step": 238525 }, { "epoch": 2.35, "grad_norm": 8.754340171813965, "learning_rate": 1.061448275862069e-06, "loss": 0.0993, "step": 238550 }, { "epoch": 2.35, "grad_norm": 2.20416522026062, "learning_rate": 1.0610172413793105e-06, "loss": 0.1609, "step": 238575 }, { "epoch": 2.35, "grad_norm": 8.823692321777344, "learning_rate": 1.0605862068965518e-06, "loss": 0.1067, "step": 238600 }, { "epoch": 2.35, "grad_norm": 4.7817606925964355, "learning_rate": 1.0601551724137932e-06, "loss": 0.1791, "step": 238625 }, { "epoch": 2.35, "grad_norm": 7.112857818603516, "learning_rate": 1.0597241379310345e-06, "loss": 0.0834, "step": 238650 }, { "epoch": 2.35, "grad_norm": 3.8670899868011475, "learning_rate": 1.059293103448276e-06, "loss": 0.165, "step": 238675 }, { "epoch": 2.35, "grad_norm": 14.608867645263672, "learning_rate": 1.0588620689655174e-06, "loss": 0.1146, "step": 238700 }, { "epoch": 2.35, "grad_norm": 4.786595821380615, "learning_rate": 1.0584310344827588e-06, "loss": 0.1866, "step": 238725 }, { "epoch": 2.35, "grad_norm": 4.282232284545898, "learning_rate": 1.058e-06, "loss": 0.1164, "step": 238750 }, { "epoch": 2.35, "grad_norm": 7.399171352386475, "learning_rate": 1.0575689655172415e-06, "loss": 0.1408, "step": 238775 }, { "epoch": 2.35, "grad_norm": 8.896906852722168, "learning_rate": 1.0571379310344828e-06, "loss": 0.1278, "step": 238800 }, { "epoch": 2.35, "grad_norm": 2.884584903717041, "learning_rate": 1.0567068965517242e-06, "loss": 0.2009, "step": 238825 }, { "epoch": 2.35, "grad_norm": 22.269514083862305, "learning_rate": 1.0562758620689657e-06, "loss": 0.0933, "step": 238850 }, { "epoch": 2.35, "grad_norm": 4.1250433921813965, "learning_rate": 1.055844827586207e-06, "loss": 0.2, "step": 238875 }, { "epoch": 2.35, "grad_norm": 12.970697402954102, "learning_rate": 1.0554137931034484e-06, "loss": 0.1047, "step": 238900 }, { "epoch": 2.35, "grad_norm": 2.158930778503418, "learning_rate": 1.0549827586206896e-06, "loss": 0.1786, "step": 238925 }, { "epoch": 2.35, "grad_norm": 14.454095840454102, "learning_rate": 1.054551724137931e-06, "loss": 0.1037, "step": 238950 }, { "epoch": 2.35, "grad_norm": 3.6900382041931152, "learning_rate": 1.0541206896551725e-06, "loss": 0.2177, "step": 238975 }, { "epoch": 2.35, "grad_norm": 8.92093563079834, "learning_rate": 1.053689655172414e-06, "loss": 0.1237, "step": 239000 }, { "epoch": 2.35, "grad_norm": 4.347302436828613, "learning_rate": 1.0532586206896553e-06, "loss": 0.1864, "step": 239025 }, { "epoch": 2.35, "grad_norm": 6.034411430358887, "learning_rate": 1.0528275862068967e-06, "loss": 0.0896, "step": 239050 }, { "epoch": 2.35, "grad_norm": 1.4156006574630737, "learning_rate": 1.052396551724138e-06, "loss": 0.1579, "step": 239075 }, { "epoch": 2.35, "grad_norm": 8.263794898986816, "learning_rate": 1.0519655172413794e-06, "loss": 0.1165, "step": 239100 }, { "epoch": 2.35, "grad_norm": 5.205849647521973, "learning_rate": 1.0515344827586207e-06, "loss": 0.1969, "step": 239125 }, { "epoch": 2.35, "grad_norm": 6.498908519744873, "learning_rate": 1.0511034482758621e-06, "loss": 0.0822, "step": 239150 }, { "epoch": 2.35, "grad_norm": 4.642198085784912, "learning_rate": 1.0506724137931036e-06, "loss": 0.168, "step": 239175 }, { "epoch": 2.35, "grad_norm": 24.529998779296875, "learning_rate": 1.050241379310345e-06, "loss": 0.1043, "step": 239200 }, { "epoch": 2.35, "grad_norm": 2.8696699142456055, "learning_rate": 1.0498103448275863e-06, "loss": 0.1852, "step": 239225 }, { "epoch": 2.35, "grad_norm": 14.638116836547852, "learning_rate": 1.0493793103448277e-06, "loss": 0.1034, "step": 239250 }, { "epoch": 2.35, "grad_norm": 2.4861037731170654, "learning_rate": 1.048948275862069e-06, "loss": 0.1282, "step": 239275 }, { "epoch": 2.35, "grad_norm": 10.050103187561035, "learning_rate": 1.0485172413793104e-06, "loss": 0.1182, "step": 239300 }, { "epoch": 2.35, "grad_norm": 2.1974287033081055, "learning_rate": 1.0480862068965519e-06, "loss": 0.1614, "step": 239325 }, { "epoch": 2.35, "grad_norm": 9.965044975280762, "learning_rate": 1.0476551724137931e-06, "loss": 0.0908, "step": 239350 }, { "epoch": 2.35, "grad_norm": 1.2921972274780273, "learning_rate": 1.0472241379310346e-06, "loss": 0.1699, "step": 239375 }, { "epoch": 2.35, "grad_norm": 11.637674331665039, "learning_rate": 1.0467931034482758e-06, "loss": 0.072, "step": 239400 }, { "epoch": 2.35, "grad_norm": 5.5583367347717285, "learning_rate": 1.0463620689655173e-06, "loss": 0.1744, "step": 239425 }, { "epoch": 2.35, "grad_norm": 8.383329391479492, "learning_rate": 1.0459310344827587e-06, "loss": 0.1269, "step": 239450 }, { "epoch": 2.35, "grad_norm": 4.169589996337891, "learning_rate": 1.0455000000000002e-06, "loss": 0.1504, "step": 239475 }, { "epoch": 2.35, "grad_norm": 8.980579376220703, "learning_rate": 1.0450689655172414e-06, "loss": 0.0755, "step": 239500 }, { "epoch": 2.36, "grad_norm": 0.5174484252929688, "learning_rate": 1.044637931034483e-06, "loss": 0.1383, "step": 239525 }, { "epoch": 2.36, "grad_norm": 10.220207214355469, "learning_rate": 1.0442068965517241e-06, "loss": 0.0723, "step": 239550 }, { "epoch": 2.36, "grad_norm": 4.751698017120361, "learning_rate": 1.0437758620689656e-06, "loss": 0.1779, "step": 239575 }, { "epoch": 2.36, "grad_norm": 13.998199462890625, "learning_rate": 1.0433448275862068e-06, "loss": 0.0992, "step": 239600 }, { "epoch": 2.36, "grad_norm": 4.413160800933838, "learning_rate": 1.0429137931034483e-06, "loss": 0.1881, "step": 239625 }, { "epoch": 2.36, "grad_norm": 9.57178020477295, "learning_rate": 1.0424827586206898e-06, "loss": 0.0978, "step": 239650 }, { "epoch": 2.36, "grad_norm": 6.896992206573486, "learning_rate": 1.0420517241379312e-06, "loss": 0.1638, "step": 239675 }, { "epoch": 2.36, "grad_norm": 4.970482349395752, "learning_rate": 1.0416206896551725e-06, "loss": 0.0878, "step": 239700 }, { "epoch": 2.36, "grad_norm": 1.4015415906906128, "learning_rate": 1.041189655172414e-06, "loss": 0.1989, "step": 239725 }, { "epoch": 2.36, "grad_norm": 11.168560028076172, "learning_rate": 1.0407586206896554e-06, "loss": 0.1123, "step": 239750 }, { "epoch": 2.36, "grad_norm": 7.5137104988098145, "learning_rate": 1.0403275862068966e-06, "loss": 0.202, "step": 239775 }, { "epoch": 2.36, "grad_norm": 9.626458168029785, "learning_rate": 1.039896551724138e-06, "loss": 0.1172, "step": 239800 }, { "epoch": 2.36, "grad_norm": 3.560520648956299, "learning_rate": 1.0394655172413793e-06, "loss": 0.1872, "step": 239825 }, { "epoch": 2.36, "grad_norm": 15.780730247497559, "learning_rate": 1.0390344827586208e-06, "loss": 0.0938, "step": 239850 }, { "epoch": 2.36, "grad_norm": 5.499647617340088, "learning_rate": 1.038603448275862e-06, "loss": 0.1334, "step": 239875 }, { "epoch": 2.36, "grad_norm": 5.3176069259643555, "learning_rate": 1.0381724137931037e-06, "loss": 0.0765, "step": 239900 }, { "epoch": 2.36, "grad_norm": 2.400089740753174, "learning_rate": 1.0377586206896551e-06, "loss": 0.1692, "step": 239925 }, { "epoch": 2.36, "grad_norm": 10.369872093200684, "learning_rate": 1.0373275862068966e-06, "loss": 0.0891, "step": 239950 }, { "epoch": 2.36, "grad_norm": 5.769360065460205, "learning_rate": 1.036896551724138e-06, "loss": 0.1721, "step": 239975 }, { "epoch": 2.36, "grad_norm": 16.230548858642578, "learning_rate": 1.0364655172413795e-06, "loss": 0.1175, "step": 240000 }, { "epoch": 2.36, "eval_loss": 0.6098799705505371, "eval_runtime": 5905.0633, "eval_samples_per_second": 1.603, "eval_steps_per_second": 0.201, "eval_wer": 0.11742741260542072, "step": 240000 }, { "epoch": 2.36, "grad_norm": 1.1732983589172363, "learning_rate": 1.0360344827586208e-06, "loss": 0.1838, "step": 240025 }, { "epoch": 2.36, "grad_norm": 13.092979431152344, "learning_rate": 1.0356034482758622e-06, "loss": 0.1021, "step": 240050 }, { "epoch": 2.36, "grad_norm": 3.976050853729248, "learning_rate": 1.0351724137931035e-06, "loss": 0.2051, "step": 240075 }, { "epoch": 2.36, "grad_norm": 13.596699714660645, "learning_rate": 1.034741379310345e-06, "loss": 0.0743, "step": 240100 }, { "epoch": 2.36, "grad_norm": 4.201897621154785, "learning_rate": 1.0343103448275862e-06, "loss": 0.1946, "step": 240125 }, { "epoch": 2.36, "grad_norm": 9.769874572753906, "learning_rate": 1.0338793103448276e-06, "loss": 0.0923, "step": 240150 }, { "epoch": 2.36, "grad_norm": 3.961540460586548, "learning_rate": 1.033448275862069e-06, "loss": 0.223, "step": 240175 }, { "epoch": 2.36, "grad_norm": 4.506981372833252, "learning_rate": 1.0330172413793105e-06, "loss": 0.0682, "step": 240200 }, { "epoch": 2.36, "grad_norm": 3.3473663330078125, "learning_rate": 1.0325862068965518e-06, "loss": 0.172, "step": 240225 }, { "epoch": 2.36, "grad_norm": 14.79516887664795, "learning_rate": 1.0321551724137932e-06, "loss": 0.1241, "step": 240250 }, { "epoch": 2.36, "grad_norm": 2.94423508644104, "learning_rate": 1.0317241379310345e-06, "loss": 0.1795, "step": 240275 }, { "epoch": 2.36, "grad_norm": 9.34498119354248, "learning_rate": 1.031293103448276e-06, "loss": 0.0905, "step": 240300 }, { "epoch": 2.36, "grad_norm": 2.7903425693511963, "learning_rate": 1.0308620689655174e-06, "loss": 0.1499, "step": 240325 }, { "epoch": 2.36, "grad_norm": 8.621875762939453, "learning_rate": 1.0304310344827586e-06, "loss": 0.062, "step": 240350 }, { "epoch": 2.36, "grad_norm": 10.132418632507324, "learning_rate": 1.03e-06, "loss": 0.1651, "step": 240375 }, { "epoch": 2.36, "grad_norm": 6.831372261047363, "learning_rate": 1.0295689655172415e-06, "loss": 0.0634, "step": 240400 }, { "epoch": 2.36, "grad_norm": 2.134843111038208, "learning_rate": 1.0291379310344828e-06, "loss": 0.1913, "step": 240425 }, { "epoch": 2.36, "grad_norm": 10.958087921142578, "learning_rate": 1.0287068965517242e-06, "loss": 0.1052, "step": 240450 }, { "epoch": 2.36, "grad_norm": 1.89592707157135, "learning_rate": 1.0282758620689657e-06, "loss": 0.1792, "step": 240475 }, { "epoch": 2.36, "grad_norm": 9.266828536987305, "learning_rate": 1.027844827586207e-06, "loss": 0.0912, "step": 240500 }, { "epoch": 2.36, "grad_norm": 2.1046817302703857, "learning_rate": 1.0274137931034484e-06, "loss": 0.1968, "step": 240525 }, { "epoch": 2.37, "grad_norm": 4.490945339202881, "learning_rate": 1.0269827586206897e-06, "loss": 0.0798, "step": 240550 }, { "epoch": 2.37, "grad_norm": 5.513803958892822, "learning_rate": 1.0265517241379311e-06, "loss": 0.1553, "step": 240575 }, { "epoch": 2.37, "grad_norm": 9.1251802444458, "learning_rate": 1.0261206896551724e-06, "loss": 0.0858, "step": 240600 }, { "epoch": 2.37, "grad_norm": 5.5231404304504395, "learning_rate": 1.0256896551724138e-06, "loss": 0.1627, "step": 240625 }, { "epoch": 2.37, "grad_norm": 11.355608940124512, "learning_rate": 1.0252586206896553e-06, "loss": 0.0888, "step": 240650 }, { "epoch": 2.37, "grad_norm": 6.307230472564697, "learning_rate": 1.0248275862068967e-06, "loss": 0.1487, "step": 240675 }, { "epoch": 2.37, "grad_norm": 6.990225315093994, "learning_rate": 1.024396551724138e-06, "loss": 0.0971, "step": 240700 }, { "epoch": 2.37, "grad_norm": 0.6546413898468018, "learning_rate": 1.0239655172413794e-06, "loss": 0.1849, "step": 240725 }, { "epoch": 2.37, "grad_norm": 0.969723641872406, "learning_rate": 1.0235344827586209e-06, "loss": 0.1048, "step": 240750 }, { "epoch": 2.37, "grad_norm": 4.553122520446777, "learning_rate": 1.0231034482758621e-06, "loss": 0.2017, "step": 240775 }, { "epoch": 2.37, "grad_norm": 11.316327095031738, "learning_rate": 1.0226724137931036e-06, "loss": 0.0859, "step": 240800 }, { "epoch": 2.37, "grad_norm": 6.048890590667725, "learning_rate": 1.0222413793103448e-06, "loss": 0.1944, "step": 240825 }, { "epoch": 2.37, "grad_norm": 9.188263893127441, "learning_rate": 1.0218103448275863e-06, "loss": 0.0975, "step": 240850 }, { "epoch": 2.37, "grad_norm": 0.32647231221199036, "learning_rate": 1.0213793103448277e-06, "loss": 0.1563, "step": 240875 }, { "epoch": 2.37, "grad_norm": 14.603327751159668, "learning_rate": 1.0209482758620692e-06, "loss": 0.0896, "step": 240900 }, { "epoch": 2.37, "grad_norm": 2.558224678039551, "learning_rate": 1.0205344827586209e-06, "loss": 0.1731, "step": 240925 }, { "epoch": 2.37, "grad_norm": 11.452211380004883, "learning_rate": 1.0201034482758621e-06, "loss": 0.0896, "step": 240950 }, { "epoch": 2.37, "grad_norm": 4.915871620178223, "learning_rate": 1.0196724137931036e-06, "loss": 0.1799, "step": 240975 }, { "epoch": 2.37, "grad_norm": 12.56318187713623, "learning_rate": 1.019241379310345e-06, "loss": 0.0931, "step": 241000 }, { "epoch": 2.37, "grad_norm": 5.179165840148926, "learning_rate": 1.0188103448275863e-06, "loss": 0.1839, "step": 241025 }, { "epoch": 2.37, "grad_norm": 7.567038059234619, "learning_rate": 1.0183793103448277e-06, "loss": 0.0969, "step": 241050 }, { "epoch": 2.37, "grad_norm": 3.028561592102051, "learning_rate": 1.017948275862069e-06, "loss": 0.1813, "step": 241075 }, { "epoch": 2.37, "grad_norm": 18.67570686340332, "learning_rate": 1.0175172413793104e-06, "loss": 0.1345, "step": 241100 }, { "epoch": 2.37, "grad_norm": 6.553493022918701, "learning_rate": 1.0170862068965517e-06, "loss": 0.1681, "step": 241125 }, { "epoch": 2.37, "grad_norm": 15.060640335083008, "learning_rate": 1.0166551724137933e-06, "loss": 0.1128, "step": 241150 }, { "epoch": 2.37, "grad_norm": 6.061211109161377, "learning_rate": 1.0162241379310346e-06, "loss": 0.188, "step": 241175 }, { "epoch": 2.37, "grad_norm": 5.779300689697266, "learning_rate": 1.015793103448276e-06, "loss": 0.0969, "step": 241200 }, { "epoch": 2.37, "grad_norm": 4.612705707550049, "learning_rate": 1.0153620689655173e-06, "loss": 0.1976, "step": 241225 }, { "epoch": 2.37, "grad_norm": 3.3513906002044678, "learning_rate": 1.0149310344827587e-06, "loss": 0.0832, "step": 241250 }, { "epoch": 2.37, "grad_norm": 5.83075475692749, "learning_rate": 1.0145e-06, "loss": 0.1674, "step": 241275 }, { "epoch": 2.37, "grad_norm": 12.395963668823242, "learning_rate": 1.0140689655172414e-06, "loss": 0.1186, "step": 241300 }, { "epoch": 2.37, "grad_norm": 1.3303611278533936, "learning_rate": 1.013637931034483e-06, "loss": 0.1558, "step": 241325 }, { "epoch": 2.37, "grad_norm": 7.69815731048584, "learning_rate": 1.0132068965517241e-06, "loss": 0.0641, "step": 241350 }, { "epoch": 2.37, "grad_norm": 7.363772392272949, "learning_rate": 1.0127758620689656e-06, "loss": 0.1549, "step": 241375 }, { "epoch": 2.37, "grad_norm": 4.937288284301758, "learning_rate": 1.012344827586207e-06, "loss": 0.0982, "step": 241400 }, { "epoch": 2.37, "grad_norm": 3.6746561527252197, "learning_rate": 1.0119137931034483e-06, "loss": 0.1762, "step": 241425 }, { "epoch": 2.37, "grad_norm": 11.541569709777832, "learning_rate": 1.0114827586206898e-06, "loss": 0.0933, "step": 241450 }, { "epoch": 2.37, "grad_norm": 3.7834887504577637, "learning_rate": 1.0110517241379312e-06, "loss": 0.1934, "step": 241475 }, { "epoch": 2.37, "grad_norm": 5.839188098907471, "learning_rate": 1.0106206896551725e-06, "loss": 0.0953, "step": 241500 }, { "epoch": 2.37, "grad_norm": 2.2436625957489014, "learning_rate": 1.010189655172414e-06, "loss": 0.1824, "step": 241525 }, { "epoch": 2.37, "grad_norm": 12.237743377685547, "learning_rate": 1.0097586206896552e-06, "loss": 0.1232, "step": 241550 }, { "epoch": 2.38, "grad_norm": 5.1611504554748535, "learning_rate": 1.0093275862068966e-06, "loss": 0.1447, "step": 241575 }, { "epoch": 2.38, "grad_norm": 18.628786087036133, "learning_rate": 1.0088965517241379e-06, "loss": 0.1145, "step": 241600 }, { "epoch": 2.38, "grad_norm": 5.512409687042236, "learning_rate": 1.0084655172413795e-06, "loss": 0.1905, "step": 241625 }, { "epoch": 2.38, "grad_norm": 14.445805549621582, "learning_rate": 1.0080344827586208e-06, "loss": 0.1146, "step": 241650 }, { "epoch": 2.38, "grad_norm": 5.931060314178467, "learning_rate": 1.0076034482758622e-06, "loss": 0.1805, "step": 241675 }, { "epoch": 2.38, "grad_norm": 8.538457870483398, "learning_rate": 1.0071724137931035e-06, "loss": 0.1049, "step": 241700 }, { "epoch": 2.38, "grad_norm": 4.460370063781738, "learning_rate": 1.006741379310345e-06, "loss": 0.1576, "step": 241725 }, { "epoch": 2.38, "grad_norm": 8.805161476135254, "learning_rate": 1.0063103448275862e-06, "loss": 0.0856, "step": 241750 }, { "epoch": 2.38, "grad_norm": 6.2549309730529785, "learning_rate": 1.0058793103448276e-06, "loss": 0.205, "step": 241775 }, { "epoch": 2.38, "grad_norm": 5.991171360015869, "learning_rate": 1.005448275862069e-06, "loss": 0.0978, "step": 241800 }, { "epoch": 2.38, "grad_norm": 9.118733406066895, "learning_rate": 1.0050172413793103e-06, "loss": 0.1884, "step": 241825 }, { "epoch": 2.38, "grad_norm": 10.573347091674805, "learning_rate": 1.0045862068965518e-06, "loss": 0.0799, "step": 241850 }, { "epoch": 2.38, "grad_norm": 6.15659236907959, "learning_rate": 1.0041551724137932e-06, "loss": 0.1519, "step": 241875 }, { "epoch": 2.38, "grad_norm": 12.18627643585205, "learning_rate": 1.0037241379310347e-06, "loss": 0.1263, "step": 241900 }, { "epoch": 2.38, "grad_norm": 6.982051372528076, "learning_rate": 1.003293103448276e-06, "loss": 0.1701, "step": 241925 }, { "epoch": 2.38, "grad_norm": 9.825786590576172, "learning_rate": 1.0028620689655174e-06, "loss": 0.0688, "step": 241950 }, { "epoch": 2.38, "grad_norm": 4.253717422485352, "learning_rate": 1.0024310344827586e-06, "loss": 0.1586, "step": 241975 }, { "epoch": 2.38, "grad_norm": 9.25966739654541, "learning_rate": 1.002e-06, "loss": 0.0919, "step": 242000 }, { "epoch": 2.38, "grad_norm": 6.352293014526367, "learning_rate": 1.0015689655172413e-06, "loss": 0.1567, "step": 242025 }, { "epoch": 2.38, "grad_norm": 7.308487415313721, "learning_rate": 1.0011379310344828e-06, "loss": 0.107, "step": 242050 }, { "epoch": 2.38, "grad_norm": 4.152288913726807, "learning_rate": 1.0007068965517243e-06, "loss": 0.1895, "step": 242075 }, { "epoch": 2.38, "grad_norm": 6.175613880157471, "learning_rate": 1.0002758620689657e-06, "loss": 0.0786, "step": 242100 }, { "epoch": 2.38, "grad_norm": 2.9726147651672363, "learning_rate": 9.99844827586207e-07, "loss": 0.2074, "step": 242125 }, { "epoch": 2.38, "grad_norm": 10.021071434020996, "learning_rate": 9.994137931034484e-07, "loss": 0.0803, "step": 242150 }, { "epoch": 2.38, "grad_norm": 3.7731821537017822, "learning_rate": 9.989827586206897e-07, "loss": 0.1473, "step": 242175 }, { "epoch": 2.38, "grad_norm": 12.301365852355957, "learning_rate": 9.985517241379311e-07, "loss": 0.0942, "step": 242200 }, { "epoch": 2.38, "grad_norm": 5.744278907775879, "learning_rate": 9.981206896551726e-07, "loss": 0.1822, "step": 242225 }, { "epoch": 2.38, "grad_norm": 11.446584701538086, "learning_rate": 9.976896551724138e-07, "loss": 0.1012, "step": 242250 }, { "epoch": 2.38, "grad_norm": 4.959374904632568, "learning_rate": 9.972586206896553e-07, "loss": 0.1649, "step": 242275 }, { "epoch": 2.38, "grad_norm": 6.940905570983887, "learning_rate": 9.968275862068965e-07, "loss": 0.1249, "step": 242300 }, { "epoch": 2.38, "grad_norm": 2.582655668258667, "learning_rate": 9.96396551724138e-07, "loss": 0.1622, "step": 242325 }, { "epoch": 2.38, "grad_norm": 10.950953483581543, "learning_rate": 9.959655172413794e-07, "loss": 0.1108, "step": 242350 }, { "epoch": 2.38, "grad_norm": 6.35279655456543, "learning_rate": 9.955344827586209e-07, "loss": 0.1719, "step": 242375 }, { "epoch": 2.38, "grad_norm": 18.92183494567871, "learning_rate": 9.951034482758621e-07, "loss": 0.0892, "step": 242400 }, { "epoch": 2.38, "grad_norm": 1.5348845720291138, "learning_rate": 9.946724137931036e-07, "loss": 0.1283, "step": 242425 }, { "epoch": 2.38, "grad_norm": 7.749289035797119, "learning_rate": 9.942413793103448e-07, "loss": 0.0909, "step": 242450 }, { "epoch": 2.38, "grad_norm": 6.63970947265625, "learning_rate": 9.938103448275863e-07, "loss": 0.2126, "step": 242475 }, { "epoch": 2.38, "grad_norm": 13.255653381347656, "learning_rate": 9.933793103448275e-07, "loss": 0.0832, "step": 242500 }, { "epoch": 2.38, "grad_norm": 5.847785949707031, "learning_rate": 9.92948275862069e-07, "loss": 0.2056, "step": 242525 }, { "epoch": 2.38, "grad_norm": 12.233731269836426, "learning_rate": 9.925172413793105e-07, "loss": 0.0755, "step": 242550 }, { "epoch": 2.39, "grad_norm": 3.462759017944336, "learning_rate": 9.92086206896552e-07, "loss": 0.1801, "step": 242575 }, { "epoch": 2.39, "grad_norm": 5.3533935546875, "learning_rate": 9.916551724137932e-07, "loss": 0.0744, "step": 242600 }, { "epoch": 2.39, "grad_norm": 6.637218952178955, "learning_rate": 9.912241379310346e-07, "loss": 0.1953, "step": 242625 }, { "epoch": 2.39, "grad_norm": 14.751591682434082, "learning_rate": 9.907931034482759e-07, "loss": 0.0892, "step": 242650 }, { "epoch": 2.39, "grad_norm": 3.1620090007781982, "learning_rate": 9.903620689655173e-07, "loss": 0.2041, "step": 242675 }, { "epoch": 2.39, "grad_norm": 13.96461009979248, "learning_rate": 9.899310344827588e-07, "loss": 0.0848, "step": 242700 }, { "epoch": 2.39, "grad_norm": 4.0572710037231445, "learning_rate": 9.895e-07, "loss": 0.1816, "step": 242725 }, { "epoch": 2.39, "grad_norm": 13.224246978759766, "learning_rate": 9.890689655172415e-07, "loss": 0.1136, "step": 242750 }, { "epoch": 2.39, "grad_norm": 7.532046318054199, "learning_rate": 9.886379310344827e-07, "loss": 0.2377, "step": 242775 }, { "epoch": 2.39, "grad_norm": 9.911016464233398, "learning_rate": 9.882068965517242e-07, "loss": 0.1118, "step": 242800 }, { "epoch": 2.39, "grad_norm": 0.28123152256011963, "learning_rate": 9.877758620689656e-07, "loss": 0.1751, "step": 242825 }, { "epoch": 2.39, "grad_norm": 11.616909980773926, "learning_rate": 9.87344827586207e-07, "loss": 0.0901, "step": 242850 }, { "epoch": 2.39, "grad_norm": 4.0213518142700195, "learning_rate": 9.869137931034483e-07, "loss": 0.2021, "step": 242875 }, { "epoch": 2.39, "grad_norm": 7.659409999847412, "learning_rate": 9.864827586206898e-07, "loss": 0.1137, "step": 242900 }, { "epoch": 2.39, "grad_norm": 6.8491740226745605, "learning_rate": 9.86051724137931e-07, "loss": 0.1955, "step": 242925 }, { "epoch": 2.39, "grad_norm": 9.654189109802246, "learning_rate": 9.856206896551725e-07, "loss": 0.1076, "step": 242950 }, { "epoch": 2.39, "grad_norm": 9.17475700378418, "learning_rate": 9.851896551724137e-07, "loss": 0.1903, "step": 242975 }, { "epoch": 2.39, "grad_norm": 10.16565990447998, "learning_rate": 9.847586206896552e-07, "loss": 0.0777, "step": 243000 }, { "epoch": 2.39, "grad_norm": 0.8773708343505859, "learning_rate": 9.843275862068966e-07, "loss": 0.17, "step": 243025 }, { "epoch": 2.39, "grad_norm": 8.47010612487793, "learning_rate": 9.83896551724138e-07, "loss": 0.0898, "step": 243050 }, { "epoch": 2.39, "grad_norm": 3.1231930255889893, "learning_rate": 9.834655172413793e-07, "loss": 0.1667, "step": 243075 }, { "epoch": 2.39, "grad_norm": 11.303363800048828, "learning_rate": 9.830344827586208e-07, "loss": 0.0819, "step": 243100 }, { "epoch": 2.39, "grad_norm": 2.5325019359588623, "learning_rate": 9.826034482758623e-07, "loss": 0.1566, "step": 243125 }, { "epoch": 2.39, "grad_norm": 10.900447845458984, "learning_rate": 9.821724137931035e-07, "loss": 0.1012, "step": 243150 }, { "epoch": 2.39, "grad_norm": 6.527733325958252, "learning_rate": 9.81741379310345e-07, "loss": 0.2177, "step": 243175 }, { "epoch": 2.39, "grad_norm": 10.351731300354004, "learning_rate": 9.813103448275862e-07, "loss": 0.1187, "step": 243200 }, { "epoch": 2.39, "grad_norm": 10.3812894821167, "learning_rate": 9.808793103448277e-07, "loss": 0.2008, "step": 243225 }, { "epoch": 2.39, "grad_norm": 4.805160045623779, "learning_rate": 9.80448275862069e-07, "loss": 0.0717, "step": 243250 }, { "epoch": 2.39, "grad_norm": 1.8305529356002808, "learning_rate": 9.800172413793106e-07, "loss": 0.1719, "step": 243275 }, { "epoch": 2.39, "grad_norm": 9.827518463134766, "learning_rate": 9.795862068965518e-07, "loss": 0.0784, "step": 243300 }, { "epoch": 2.39, "grad_norm": 3.5516061782836914, "learning_rate": 9.791551724137933e-07, "loss": 0.1873, "step": 243325 }, { "epoch": 2.39, "grad_norm": 10.320033073425293, "learning_rate": 9.787241379310345e-07, "loss": 0.0968, "step": 243350 }, { "epoch": 2.39, "grad_norm": 4.721782684326172, "learning_rate": 9.78293103448276e-07, "loss": 0.1703, "step": 243375 }, { "epoch": 2.39, "grad_norm": 11.477081298828125, "learning_rate": 9.778620689655172e-07, "loss": 0.1306, "step": 243400 }, { "epoch": 2.39, "grad_norm": 4.816943168640137, "learning_rate": 9.774310344827587e-07, "loss": 0.1626, "step": 243425 }, { "epoch": 2.39, "grad_norm": 24.051651000976562, "learning_rate": 9.770000000000001e-07, "loss": 0.1343, "step": 243450 }, { "epoch": 2.39, "grad_norm": 4.002688407897949, "learning_rate": 9.765689655172414e-07, "loss": 0.1704, "step": 243475 }, { "epoch": 2.39, "grad_norm": 10.62880802154541, "learning_rate": 9.761379310344828e-07, "loss": 0.1024, "step": 243500 }, { "epoch": 2.39, "grad_norm": 2.800367593765259, "learning_rate": 9.757068965517243e-07, "loss": 0.1648, "step": 243525 }, { "epoch": 2.39, "grad_norm": 10.13687801361084, "learning_rate": 9.752758620689655e-07, "loss": 0.1004, "step": 243550 }, { "epoch": 2.39, "grad_norm": 3.813633918762207, "learning_rate": 9.74844827586207e-07, "loss": 0.1717, "step": 243575 }, { "epoch": 2.4, "grad_norm": 19.088829040527344, "learning_rate": 9.744137931034484e-07, "loss": 0.0878, "step": 243600 }, { "epoch": 2.4, "grad_norm": 4.474357604980469, "learning_rate": 9.739827586206897e-07, "loss": 0.1791, "step": 243625 }, { "epoch": 2.4, "grad_norm": 12.350476264953613, "learning_rate": 9.735517241379311e-07, "loss": 0.0928, "step": 243650 }, { "epoch": 2.4, "grad_norm": 3.339872360229492, "learning_rate": 9.731206896551724e-07, "loss": 0.1722, "step": 243675 }, { "epoch": 2.4, "grad_norm": 6.81261682510376, "learning_rate": 9.726896551724139e-07, "loss": 0.1122, "step": 243700 }, { "epoch": 2.4, "grad_norm": 3.6767678260803223, "learning_rate": 9.722586206896553e-07, "loss": 0.1756, "step": 243725 }, { "epoch": 2.4, "grad_norm": 12.027571678161621, "learning_rate": 9.718275862068968e-07, "loss": 0.095, "step": 243750 }, { "epoch": 2.4, "grad_norm": 4.742911338806152, "learning_rate": 9.71396551724138e-07, "loss": 0.1531, "step": 243775 }, { "epoch": 2.4, "grad_norm": 5.292638301849365, "learning_rate": 9.709655172413795e-07, "loss": 0.1082, "step": 243800 }, { "epoch": 2.4, "grad_norm": 2.733100175857544, "learning_rate": 9.705344827586207e-07, "loss": 0.1933, "step": 243825 }, { "epoch": 2.4, "grad_norm": 9.897337913513184, "learning_rate": 9.701034482758622e-07, "loss": 0.1193, "step": 243850 }, { "epoch": 2.4, "grad_norm": 1.9384994506835938, "learning_rate": 9.696724137931034e-07, "loss": 0.1726, "step": 243875 }, { "epoch": 2.4, "grad_norm": 5.170419692993164, "learning_rate": 9.692413793103449e-07, "loss": 0.0929, "step": 243900 }, { "epoch": 2.4, "grad_norm": 6.854851722717285, "learning_rate": 9.688103448275863e-07, "loss": 0.1778, "step": 243925 }, { "epoch": 2.4, "grad_norm": 5.572196960449219, "learning_rate": 9.683793103448276e-07, "loss": 0.093, "step": 243950 }, { "epoch": 2.4, "grad_norm": 7.3272271156311035, "learning_rate": 9.67948275862069e-07, "loss": 0.1603, "step": 243975 }, { "epoch": 2.4, "grad_norm": 6.34375524520874, "learning_rate": 9.675172413793105e-07, "loss": 0.1153, "step": 244000 }, { "epoch": 2.4, "grad_norm": 3.830690383911133, "learning_rate": 9.67086206896552e-07, "loss": 0.1671, "step": 244025 }, { "epoch": 2.4, "grad_norm": 7.3639960289001465, "learning_rate": 9.666551724137932e-07, "loss": 0.0772, "step": 244050 }, { "epoch": 2.4, "grad_norm": 7.393798351287842, "learning_rate": 9.662241379310346e-07, "loss": 0.1801, "step": 244075 }, { "epoch": 2.4, "grad_norm": 10.10035228729248, "learning_rate": 9.657931034482759e-07, "loss": 0.0788, "step": 244100 }, { "epoch": 2.4, "grad_norm": 3.122570514678955, "learning_rate": 9.653620689655173e-07, "loss": 0.2146, "step": 244125 }, { "epoch": 2.4, "grad_norm": 12.716325759887695, "learning_rate": 9.649310344827586e-07, "loss": 0.1025, "step": 244150 }, { "epoch": 2.4, "grad_norm": 3.852299213409424, "learning_rate": 9.645e-07, "loss": 0.1844, "step": 244175 }, { "epoch": 2.4, "grad_norm": 12.697093963623047, "learning_rate": 9.640689655172415e-07, "loss": 0.0793, "step": 244200 }, { "epoch": 2.4, "grad_norm": 2.3511180877685547, "learning_rate": 9.63637931034483e-07, "loss": 0.2137, "step": 244225 }, { "epoch": 2.4, "grad_norm": 14.071741104125977, "learning_rate": 9.632068965517242e-07, "loss": 0.1002, "step": 244250 }, { "epoch": 2.4, "grad_norm": 4.075435161590576, "learning_rate": 9.627758620689657e-07, "loss": 0.1778, "step": 244275 }, { "epoch": 2.4, "grad_norm": 3.7190446853637695, "learning_rate": 9.62344827586207e-07, "loss": 0.1065, "step": 244300 }, { "epoch": 2.4, "grad_norm": 0.1769591122865677, "learning_rate": 9.619137931034484e-07, "loss": 0.1672, "step": 244325 }, { "epoch": 2.4, "grad_norm": 14.903225898742676, "learning_rate": 9.614827586206898e-07, "loss": 0.1035, "step": 244350 }, { "epoch": 2.4, "grad_norm": 4.798495292663574, "learning_rate": 9.61051724137931e-07, "loss": 0.1864, "step": 244375 }, { "epoch": 2.4, "grad_norm": 13.622143745422363, "learning_rate": 9.606206896551725e-07, "loss": 0.0821, "step": 244400 }, { "epoch": 2.4, "grad_norm": 3.237030029296875, "learning_rate": 9.601896551724138e-07, "loss": 0.1759, "step": 244425 }, { "epoch": 2.4, "grad_norm": 10.021717071533203, "learning_rate": 9.597586206896552e-07, "loss": 0.1129, "step": 244450 }, { "epoch": 2.4, "grad_norm": 8.154143333435059, "learning_rate": 9.593275862068967e-07, "loss": 0.14, "step": 244475 }, { "epoch": 2.4, "grad_norm": 14.187515258789062, "learning_rate": 9.588965517241381e-07, "loss": 0.0937, "step": 244500 }, { "epoch": 2.4, "grad_norm": 7.216701507568359, "learning_rate": 9.584655172413794e-07, "loss": 0.1859, "step": 244525 }, { "epoch": 2.4, "grad_norm": 13.100284576416016, "learning_rate": 9.580344827586208e-07, "loss": 0.1167, "step": 244550 }, { "epoch": 2.4, "grad_norm": 7.128902912139893, "learning_rate": 9.57603448275862e-07, "loss": 0.2088, "step": 244575 }, { "epoch": 2.4, "grad_norm": 8.249396324157715, "learning_rate": 9.571724137931035e-07, "loss": 0.0921, "step": 244600 }, { "epoch": 2.41, "grad_norm": 4.419551372528076, "learning_rate": 9.567413793103448e-07, "loss": 0.1825, "step": 244625 }, { "epoch": 2.41, "grad_norm": 16.98099708557129, "learning_rate": 9.563103448275862e-07, "loss": 0.1053, "step": 244650 }, { "epoch": 2.41, "grad_norm": 3.1498799324035645, "learning_rate": 9.558793103448277e-07, "loss": 0.1955, "step": 244675 }, { "epoch": 2.41, "grad_norm": 9.275308609008789, "learning_rate": 9.554482758620691e-07, "loss": 0.0848, "step": 244700 }, { "epoch": 2.41, "grad_norm": 1.9766892194747925, "learning_rate": 9.550172413793104e-07, "loss": 0.1577, "step": 244725 }, { "epoch": 2.41, "grad_norm": 11.75770378112793, "learning_rate": 9.545862068965518e-07, "loss": 0.1291, "step": 244750 }, { "epoch": 2.41, "grad_norm": 4.591390132904053, "learning_rate": 9.54155172413793e-07, "loss": 0.1961, "step": 244775 }, { "epoch": 2.41, "grad_norm": 14.12462043762207, "learning_rate": 9.537241379310345e-07, "loss": 0.1221, "step": 244800 }, { "epoch": 2.41, "grad_norm": 4.916555881500244, "learning_rate": 9.532931034482759e-07, "loss": 0.1415, "step": 244825 }, { "epoch": 2.41, "grad_norm": 12.274798393249512, "learning_rate": 9.528620689655172e-07, "loss": 0.1358, "step": 244850 }, { "epoch": 2.41, "grad_norm": 4.692558288574219, "learning_rate": 9.524310344827586e-07, "loss": 0.1966, "step": 244875 }, { "epoch": 2.41, "grad_norm": 4.678253173828125, "learning_rate": 9.520000000000002e-07, "loss": 0.0797, "step": 244900 }, { "epoch": 2.41, "grad_norm": 3.191603422164917, "learning_rate": 9.515689655172415e-07, "loss": 0.1618, "step": 244925 }, { "epoch": 2.41, "grad_norm": 6.282094955444336, "learning_rate": 9.511379310344829e-07, "loss": 0.0879, "step": 244950 }, { "epoch": 2.41, "grad_norm": 3.747485637664795, "learning_rate": 9.507068965517242e-07, "loss": 0.1625, "step": 244975 }, { "epoch": 2.41, "grad_norm": 12.784196853637695, "learning_rate": 9.502758620689656e-07, "loss": 0.1, "step": 245000 }, { "epoch": 2.41, "grad_norm": 4.5305070877075195, "learning_rate": 9.498620689655173e-07, "loss": 0.1443, "step": 245025 }, { "epoch": 2.41, "grad_norm": 14.360336303710938, "learning_rate": 9.494310344827587e-07, "loss": 0.1035, "step": 245050 }, { "epoch": 2.41, "grad_norm": 0.6802676916122437, "learning_rate": 9.49e-07, "loss": 0.1651, "step": 245075 }, { "epoch": 2.41, "grad_norm": 18.40972900390625, "learning_rate": 9.485689655172414e-07, "loss": 0.1046, "step": 245100 }, { "epoch": 2.41, "grad_norm": 15.237740516662598, "learning_rate": 9.481379310344827e-07, "loss": 0.1921, "step": 245125 }, { "epoch": 2.41, "grad_norm": 14.640397071838379, "learning_rate": 9.477068965517242e-07, "loss": 0.106, "step": 245150 }, { "epoch": 2.41, "grad_norm": 3.4069018363952637, "learning_rate": 9.472758620689657e-07, "loss": 0.1987, "step": 245175 }, { "epoch": 2.41, "grad_norm": 10.199816703796387, "learning_rate": 9.46844827586207e-07, "loss": 0.1013, "step": 245200 }, { "epoch": 2.41, "grad_norm": 0.015218867920339108, "learning_rate": 9.464137931034484e-07, "loss": 0.1805, "step": 245225 }, { "epoch": 2.41, "grad_norm": 8.089468955993652, "learning_rate": 9.459827586206897e-07, "loss": 0.1009, "step": 245250 }, { "epoch": 2.41, "grad_norm": 2.9608962535858154, "learning_rate": 9.455517241379312e-07, "loss": 0.1677, "step": 245275 }, { "epoch": 2.41, "grad_norm": 19.920011520385742, "learning_rate": 9.451206896551725e-07, "loss": 0.0923, "step": 245300 }, { "epoch": 2.41, "grad_norm": 4.34401273727417, "learning_rate": 9.446896551724139e-07, "loss": 0.1476, "step": 245325 }, { "epoch": 2.41, "grad_norm": 10.319293975830078, "learning_rate": 9.442586206896552e-07, "loss": 0.0825, "step": 245350 }, { "epoch": 2.41, "grad_norm": 0.6322543621063232, "learning_rate": 9.438275862068966e-07, "loss": 0.1654, "step": 245375 }, { "epoch": 2.41, "grad_norm": 11.607381820678711, "learning_rate": 9.433965517241381e-07, "loss": 0.1011, "step": 245400 }, { "epoch": 2.41, "grad_norm": 7.356059551239014, "learning_rate": 9.429655172413795e-07, "loss": 0.2137, "step": 245425 }, { "epoch": 2.41, "grad_norm": 8.554058074951172, "learning_rate": 9.425344827586208e-07, "loss": 0.0903, "step": 245450 }, { "epoch": 2.41, "grad_norm": 1.9388866424560547, "learning_rate": 9.421034482758622e-07, "loss": 0.1885, "step": 245475 }, { "epoch": 2.41, "grad_norm": 10.04891300201416, "learning_rate": 9.416724137931035e-07, "loss": 0.1293, "step": 245500 }, { "epoch": 2.41, "grad_norm": 3.7027666568756104, "learning_rate": 9.412413793103449e-07, "loss": 0.1676, "step": 245525 }, { "epoch": 2.41, "grad_norm": 8.347039222717285, "learning_rate": 9.408103448275862e-07, "loss": 0.0969, "step": 245550 }, { "epoch": 2.41, "grad_norm": 4.7533392906188965, "learning_rate": 9.403793103448276e-07, "loss": 0.1878, "step": 245575 }, { "epoch": 2.41, "grad_norm": 14.292815208435059, "learning_rate": 9.39948275862069e-07, "loss": 0.1152, "step": 245600 }, { "epoch": 2.42, "grad_norm": 11.383060455322266, "learning_rate": 9.395172413793104e-07, "loss": 0.1516, "step": 245625 }, { "epoch": 2.42, "grad_norm": 9.34765911102295, "learning_rate": 9.390862068965518e-07, "loss": 0.0862, "step": 245650 }, { "epoch": 2.42, "grad_norm": 6.005174160003662, "learning_rate": 9.386551724137932e-07, "loss": 0.1988, "step": 245675 }, { "epoch": 2.42, "grad_norm": 7.806740760803223, "learning_rate": 9.382241379310345e-07, "loss": 0.0854, "step": 245700 }, { "epoch": 2.42, "grad_norm": 1.8624751567840576, "learning_rate": 9.37793103448276e-07, "loss": 0.1696, "step": 245725 }, { "epoch": 2.42, "grad_norm": 8.715559959411621, "learning_rate": 9.373620689655174e-07, "loss": 0.1143, "step": 245750 }, { "epoch": 2.42, "grad_norm": 4.205491542816162, "learning_rate": 9.369310344827587e-07, "loss": 0.1977, "step": 245775 }, { "epoch": 2.42, "grad_norm": 11.190408706665039, "learning_rate": 9.365000000000001e-07, "loss": 0.0824, "step": 245800 }, { "epoch": 2.42, "grad_norm": 1.8930994272232056, "learning_rate": 9.360689655172414e-07, "loss": 0.1547, "step": 245825 }, { "epoch": 2.42, "grad_norm": 11.771056175231934, "learning_rate": 9.356379310344828e-07, "loss": 0.0922, "step": 245850 }, { "epoch": 2.42, "grad_norm": 0.31205058097839355, "learning_rate": 9.352068965517243e-07, "loss": 0.1948, "step": 245875 }, { "epoch": 2.42, "grad_norm": 11.022809028625488, "learning_rate": 9.347758620689657e-07, "loss": 0.0725, "step": 245900 }, { "epoch": 2.42, "grad_norm": 5.608887195587158, "learning_rate": 9.34344827586207e-07, "loss": 0.1761, "step": 245925 }, { "epoch": 2.42, "grad_norm": 9.240498542785645, "learning_rate": 9.339137931034484e-07, "loss": 0.074, "step": 245950 }, { "epoch": 2.42, "grad_norm": 3.561286449432373, "learning_rate": 9.334827586206897e-07, "loss": 0.2215, "step": 245975 }, { "epoch": 2.42, "grad_norm": 5.44218635559082, "learning_rate": 9.330517241379311e-07, "loss": 0.0962, "step": 246000 }, { "epoch": 2.42, "grad_norm": 5.495818614959717, "learning_rate": 9.326206896551724e-07, "loss": 0.1937, "step": 246025 }, { "epoch": 2.42, "grad_norm": 14.462685585021973, "learning_rate": 9.321896551724139e-07, "loss": 0.1061, "step": 246050 }, { "epoch": 2.42, "grad_norm": 0.9483263492584229, "learning_rate": 9.317586206896552e-07, "loss": 0.1934, "step": 246075 }, { "epoch": 2.42, "grad_norm": 12.322867393493652, "learning_rate": 9.313275862068966e-07, "loss": 0.0908, "step": 246100 }, { "epoch": 2.42, "grad_norm": 3.894465923309326, "learning_rate": 9.30896551724138e-07, "loss": 0.1797, "step": 246125 }, { "epoch": 2.42, "grad_norm": 4.367130279541016, "learning_rate": 9.304655172413794e-07, "loss": 0.1064, "step": 246150 }, { "epoch": 2.42, "grad_norm": 2.5205118656158447, "learning_rate": 9.300344827586208e-07, "loss": 0.1976, "step": 246175 }, { "epoch": 2.42, "grad_norm": 9.746696472167969, "learning_rate": 9.296034482758622e-07, "loss": 0.1128, "step": 246200 }, { "epoch": 2.42, "grad_norm": 0.564011812210083, "learning_rate": 9.291724137931035e-07, "loss": 0.1661, "step": 246225 }, { "epoch": 2.42, "grad_norm": 23.079402923583984, "learning_rate": 9.287413793103449e-07, "loss": 0.1271, "step": 246250 }, { "epoch": 2.42, "grad_norm": 1.9650460481643677, "learning_rate": 9.283103448275862e-07, "loss": 0.1545, "step": 246275 }, { "epoch": 2.42, "grad_norm": 2.8324267864227295, "learning_rate": 9.278793103448276e-07, "loss": 0.1051, "step": 246300 }, { "epoch": 2.42, "grad_norm": 8.311217308044434, "learning_rate": 9.274482758620689e-07, "loss": 0.1539, "step": 246325 }, { "epoch": 2.42, "grad_norm": 9.669193267822266, "learning_rate": 9.270172413793105e-07, "loss": 0.1035, "step": 246350 }, { "epoch": 2.42, "grad_norm": 3.886690139770508, "learning_rate": 9.265862068965519e-07, "loss": 0.2097, "step": 246375 }, { "epoch": 2.42, "grad_norm": 12.807404518127441, "learning_rate": 9.261551724137932e-07, "loss": 0.0812, "step": 246400 }, { "epoch": 2.42, "grad_norm": 6.346454620361328, "learning_rate": 9.257241379310346e-07, "loss": 0.163, "step": 246425 }, { "epoch": 2.42, "grad_norm": 14.911822319030762, "learning_rate": 9.252931034482759e-07, "loss": 0.0852, "step": 246450 }, { "epoch": 2.42, "grad_norm": 2.9501166343688965, "learning_rate": 9.248620689655173e-07, "loss": 0.2135, "step": 246475 }, { "epoch": 2.42, "grad_norm": 13.123298645019531, "learning_rate": 9.244310344827587e-07, "loss": 0.1229, "step": 246500 }, { "epoch": 2.42, "grad_norm": 4.247841835021973, "learning_rate": 9.240000000000001e-07, "loss": 0.166, "step": 246525 }, { "epoch": 2.42, "grad_norm": 13.735292434692383, "learning_rate": 9.235689655172414e-07, "loss": 0.1019, "step": 246550 }, { "epoch": 2.42, "grad_norm": 3.4943230152130127, "learning_rate": 9.231379310344828e-07, "loss": 0.1906, "step": 246575 }, { "epoch": 2.42, "grad_norm": 8.125344276428223, "learning_rate": 9.227068965517242e-07, "loss": 0.0917, "step": 246600 }, { "epoch": 2.42, "grad_norm": 2.6262753009796143, "learning_rate": 9.222758620689657e-07, "loss": 0.1862, "step": 246625 }, { "epoch": 2.43, "grad_norm": 7.702315330505371, "learning_rate": 9.21844827586207e-07, "loss": 0.0781, "step": 246650 }, { "epoch": 2.43, "grad_norm": 2.67769193649292, "learning_rate": 9.214137931034484e-07, "loss": 0.1747, "step": 246675 }, { "epoch": 2.43, "grad_norm": 6.700933456420898, "learning_rate": 9.209827586206897e-07, "loss": 0.0783, "step": 246700 }, { "epoch": 2.43, "grad_norm": 2.5311641693115234, "learning_rate": 9.205517241379311e-07, "loss": 0.1824, "step": 246725 }, { "epoch": 2.43, "grad_norm": 15.554533004760742, "learning_rate": 9.201206896551724e-07, "loss": 0.0879, "step": 246750 }, { "epoch": 2.43, "grad_norm": 4.689017295837402, "learning_rate": 9.196896551724138e-07, "loss": 0.1833, "step": 246775 }, { "epoch": 2.43, "grad_norm": 11.943530082702637, "learning_rate": 9.192586206896551e-07, "loss": 0.1029, "step": 246800 }, { "epoch": 2.43, "grad_norm": 1.6151829957962036, "learning_rate": 9.188275862068967e-07, "loss": 0.1905, "step": 246825 }, { "epoch": 2.43, "grad_norm": 9.897953987121582, "learning_rate": 9.18396551724138e-07, "loss": 0.0868, "step": 246850 }, { "epoch": 2.43, "grad_norm": 6.277917861938477, "learning_rate": 9.179655172413794e-07, "loss": 0.1872, "step": 246875 }, { "epoch": 2.43, "grad_norm": 5.369904041290283, "learning_rate": 9.175344827586208e-07, "loss": 0.1178, "step": 246900 }, { "epoch": 2.43, "grad_norm": 3.9161221981048584, "learning_rate": 9.171034482758621e-07, "loss": 0.1546, "step": 246925 }, { "epoch": 2.43, "grad_norm": 15.13097095489502, "learning_rate": 9.166724137931036e-07, "loss": 0.092, "step": 246950 }, { "epoch": 2.43, "grad_norm": 8.345844268798828, "learning_rate": 9.162413793103449e-07, "loss": 0.1677, "step": 246975 }, { "epoch": 2.43, "grad_norm": 4.0640974044799805, "learning_rate": 9.158103448275863e-07, "loss": 0.0869, "step": 247000 }, { "epoch": 2.43, "grad_norm": 3.095364570617676, "learning_rate": 9.153793103448276e-07, "loss": 0.1754, "step": 247025 }, { "epoch": 2.43, "grad_norm": 10.738044738769531, "learning_rate": 9.149482758620691e-07, "loss": 0.1129, "step": 247050 }, { "epoch": 2.43, "grad_norm": 2.4569623470306396, "learning_rate": 9.145172413793105e-07, "loss": 0.1461, "step": 247075 }, { "epoch": 2.43, "grad_norm": 3.361924648284912, "learning_rate": 9.140862068965519e-07, "loss": 0.0886, "step": 247100 }, { "epoch": 2.43, "grad_norm": 5.532537460327148, "learning_rate": 9.136551724137932e-07, "loss": 0.1951, "step": 247125 }, { "epoch": 2.43, "grad_norm": 12.903072357177734, "learning_rate": 9.132241379310346e-07, "loss": 0.0616, "step": 247150 }, { "epoch": 2.43, "grad_norm": 5.228731632232666, "learning_rate": 9.128103448275862e-07, "loss": 0.172, "step": 247175 }, { "epoch": 2.43, "grad_norm": 9.057682991027832, "learning_rate": 9.123793103448277e-07, "loss": 0.1035, "step": 247200 }, { "epoch": 2.43, "grad_norm": 1.1610240936279297, "learning_rate": 9.11948275862069e-07, "loss": 0.1531, "step": 247225 }, { "epoch": 2.43, "grad_norm": 11.710016250610352, "learning_rate": 9.115172413793104e-07, "loss": 0.0906, "step": 247250 }, { "epoch": 2.43, "grad_norm": 5.061301231384277, "learning_rate": 9.110862068965518e-07, "loss": 0.2081, "step": 247275 }, { "epoch": 2.43, "grad_norm": 3.6044819355010986, "learning_rate": 9.106551724137931e-07, "loss": 0.0968, "step": 247300 }, { "epoch": 2.43, "grad_norm": 2.207904577255249, "learning_rate": 9.102241379310347e-07, "loss": 0.1855, "step": 247325 }, { "epoch": 2.43, "grad_norm": 17.01077651977539, "learning_rate": 9.09793103448276e-07, "loss": 0.1023, "step": 247350 }, { "epoch": 2.43, "grad_norm": 4.158962249755859, "learning_rate": 9.093620689655174e-07, "loss": 0.1609, "step": 247375 }, { "epoch": 2.43, "grad_norm": 13.300338745117188, "learning_rate": 9.089310344827587e-07, "loss": 0.0862, "step": 247400 }, { "epoch": 2.43, "grad_norm": 5.432367324829102, "learning_rate": 9.085000000000001e-07, "loss": 0.1884, "step": 247425 }, { "epoch": 2.43, "grad_norm": 8.190391540527344, "learning_rate": 9.080689655172414e-07, "loss": 0.0922, "step": 247450 }, { "epoch": 2.43, "grad_norm": 3.683760404586792, "learning_rate": 9.076379310344828e-07, "loss": 0.166, "step": 247475 }, { "epoch": 2.43, "grad_norm": 6.9686689376831055, "learning_rate": 9.072068965517241e-07, "loss": 0.0797, "step": 247500 }, { "epoch": 2.43, "grad_norm": 0.35266029834747314, "learning_rate": 9.067758620689656e-07, "loss": 0.1535, "step": 247525 }, { "epoch": 2.43, "grad_norm": 11.622781753540039, "learning_rate": 9.06344827586207e-07, "loss": 0.1046, "step": 247550 }, { "epoch": 2.43, "grad_norm": 4.689002513885498, "learning_rate": 9.059137931034484e-07, "loss": 0.2036, "step": 247575 }, { "epoch": 2.43, "grad_norm": 6.40327262878418, "learning_rate": 9.054827586206897e-07, "loss": 0.1088, "step": 247600 }, { "epoch": 2.43, "grad_norm": 5.279117107391357, "learning_rate": 9.050517241379311e-07, "loss": 0.1811, "step": 247625 }, { "epoch": 2.43, "grad_norm": 9.278170585632324, "learning_rate": 9.046206896551725e-07, "loss": 0.0762, "step": 247650 }, { "epoch": 2.44, "grad_norm": 6.252527713775635, "learning_rate": 9.041896551724139e-07, "loss": 0.173, "step": 247675 }, { "epoch": 2.44, "grad_norm": 9.664963722229004, "learning_rate": 9.037586206896552e-07, "loss": 0.0754, "step": 247700 }, { "epoch": 2.44, "grad_norm": 4.868368625640869, "learning_rate": 9.033275862068966e-07, "loss": 0.1463, "step": 247725 }, { "epoch": 2.44, "grad_norm": 8.410982131958008, "learning_rate": 9.028965517241379e-07, "loss": 0.0704, "step": 247750 }, { "epoch": 2.44, "grad_norm": 5.581441402435303, "learning_rate": 9.024655172413793e-07, "loss": 0.1993, "step": 247775 }, { "epoch": 2.44, "grad_norm": 10.046177864074707, "learning_rate": 9.020344827586209e-07, "loss": 0.1007, "step": 247800 }, { "epoch": 2.44, "grad_norm": 1.9179370403289795, "learning_rate": 9.016034482758622e-07, "loss": 0.1978, "step": 247825 }, { "epoch": 2.44, "grad_norm": 5.424463748931885, "learning_rate": 9.011724137931036e-07, "loss": 0.1176, "step": 247850 }, { "epoch": 2.44, "grad_norm": 2.2011613845825195, "learning_rate": 9.007413793103449e-07, "loss": 0.2058, "step": 247875 }, { "epoch": 2.44, "grad_norm": 16.060251235961914, "learning_rate": 9.003103448275863e-07, "loss": 0.1167, "step": 247900 }, { "epoch": 2.44, "grad_norm": 5.4549713134765625, "learning_rate": 8.998793103448276e-07, "loss": 0.1669, "step": 247925 }, { "epoch": 2.44, "grad_norm": 8.966668128967285, "learning_rate": 8.99448275862069e-07, "loss": 0.0719, "step": 247950 }, { "epoch": 2.44, "grad_norm": 3.8475277423858643, "learning_rate": 8.990172413793104e-07, "loss": 0.1564, "step": 247975 }, { "epoch": 2.44, "grad_norm": 17.2984619140625, "learning_rate": 8.985862068965518e-07, "loss": 0.0987, "step": 248000 }, { "epoch": 2.44, "grad_norm": 0.9897249341011047, "learning_rate": 8.981551724137932e-07, "loss": 0.125, "step": 248025 }, { "epoch": 2.44, "grad_norm": 14.285856246948242, "learning_rate": 8.977241379310346e-07, "loss": 0.0983, "step": 248050 }, { "epoch": 2.44, "grad_norm": 4.396213054656982, "learning_rate": 8.972931034482759e-07, "loss": 0.1606, "step": 248075 }, { "epoch": 2.44, "grad_norm": 12.588603973388672, "learning_rate": 8.968620689655174e-07, "loss": 0.1075, "step": 248100 }, { "epoch": 2.44, "grad_norm": 1.3245689868927002, "learning_rate": 8.964310344827587e-07, "loss": 0.1491, "step": 248125 }, { "epoch": 2.44, "grad_norm": 11.36240005493164, "learning_rate": 8.960000000000001e-07, "loss": 0.0925, "step": 248150 }, { "epoch": 2.44, "grad_norm": 4.814053535461426, "learning_rate": 8.955689655172414e-07, "loss": 0.1746, "step": 248175 }, { "epoch": 2.44, "grad_norm": 13.150801658630371, "learning_rate": 8.951379310344828e-07, "loss": 0.1052, "step": 248200 }, { "epoch": 2.44, "grad_norm": 3.8187096118927, "learning_rate": 8.947068965517241e-07, "loss": 0.2103, "step": 248225 }, { "epoch": 2.44, "grad_norm": 9.228424072265625, "learning_rate": 8.942758620689655e-07, "loss": 0.111, "step": 248250 }, { "epoch": 2.44, "grad_norm": 4.747114181518555, "learning_rate": 8.93844827586207e-07, "loss": 0.1732, "step": 248275 }, { "epoch": 2.44, "grad_norm": 7.462428569793701, "learning_rate": 8.934137931034484e-07, "loss": 0.0642, "step": 248300 }, { "epoch": 2.44, "grad_norm": 3.468531608581543, "learning_rate": 8.929827586206897e-07, "loss": 0.1897, "step": 248325 }, { "epoch": 2.44, "grad_norm": 13.095329284667969, "learning_rate": 8.925517241379311e-07, "loss": 0.101, "step": 248350 }, { "epoch": 2.44, "grad_norm": 4.127970218658447, "learning_rate": 8.921206896551724e-07, "loss": 0.1653, "step": 248375 }, { "epoch": 2.44, "grad_norm": 9.824075698852539, "learning_rate": 8.916896551724138e-07, "loss": 0.121, "step": 248400 }, { "epoch": 2.44, "grad_norm": 3.429042100906372, "learning_rate": 8.912586206896553e-07, "loss": 0.1706, "step": 248425 }, { "epoch": 2.44, "grad_norm": 6.326976299285889, "learning_rate": 8.908275862068966e-07, "loss": 0.0868, "step": 248450 }, { "epoch": 2.44, "grad_norm": 3.6136515140533447, "learning_rate": 8.90396551724138e-07, "loss": 0.171, "step": 248475 }, { "epoch": 2.44, "grad_norm": 11.67345905303955, "learning_rate": 8.899655172413794e-07, "loss": 0.1049, "step": 248500 }, { "epoch": 2.44, "grad_norm": 6.353085517883301, "learning_rate": 8.895344827586208e-07, "loss": 0.194, "step": 248525 }, { "epoch": 2.44, "grad_norm": 8.879548072814941, "learning_rate": 8.891034482758622e-07, "loss": 0.0694, "step": 248550 }, { "epoch": 2.44, "grad_norm": 4.354803085327148, "learning_rate": 8.886724137931036e-07, "loss": 0.1484, "step": 248575 }, { "epoch": 2.44, "grad_norm": 8.937094688415527, "learning_rate": 8.882413793103449e-07, "loss": 0.0951, "step": 248600 }, { "epoch": 2.44, "grad_norm": 4.252033233642578, "learning_rate": 8.878103448275863e-07, "loss": 0.142, "step": 248625 }, { "epoch": 2.44, "grad_norm": 6.319611549377441, "learning_rate": 8.873793103448276e-07, "loss": 0.1086, "step": 248650 }, { "epoch": 2.45, "grad_norm": 2.2006375789642334, "learning_rate": 8.86948275862069e-07, "loss": 0.1504, "step": 248675 }, { "epoch": 2.45, "grad_norm": 11.448331832885742, "learning_rate": 8.865172413793103e-07, "loss": 0.089, "step": 248700 }, { "epoch": 2.45, "grad_norm": 6.252857208251953, "learning_rate": 8.860862068965519e-07, "loss": 0.1779, "step": 248725 }, { "epoch": 2.45, "grad_norm": 5.869881629943848, "learning_rate": 8.856551724137932e-07, "loss": 0.1113, "step": 248750 }, { "epoch": 2.45, "grad_norm": 4.100024700164795, "learning_rate": 8.852241379310346e-07, "loss": 0.2217, "step": 248775 }, { "epoch": 2.45, "grad_norm": 5.29377555847168, "learning_rate": 8.847931034482759e-07, "loss": 0.0928, "step": 248800 }, { "epoch": 2.45, "grad_norm": 2.3653972148895264, "learning_rate": 8.843620689655173e-07, "loss": 0.162, "step": 248825 }, { "epoch": 2.45, "grad_norm": 10.402261734008789, "learning_rate": 8.839310344827586e-07, "loss": 0.0808, "step": 248850 }, { "epoch": 2.45, "grad_norm": 3.998807668685913, "learning_rate": 8.835000000000001e-07, "loss": 0.1943, "step": 248875 }, { "epoch": 2.45, "grad_norm": 17.911529541015625, "learning_rate": 8.830689655172414e-07, "loss": 0.0879, "step": 248900 }, { "epoch": 2.45, "grad_norm": 1.0382392406463623, "learning_rate": 8.826379310344828e-07, "loss": 0.1839, "step": 248925 }, { "epoch": 2.45, "grad_norm": 5.9684576988220215, "learning_rate": 8.822068965517241e-07, "loss": 0.1185, "step": 248950 }, { "epoch": 2.45, "grad_norm": 4.363386154174805, "learning_rate": 8.817758620689656e-07, "loss": 0.1975, "step": 248975 }, { "epoch": 2.45, "grad_norm": 9.994220733642578, "learning_rate": 8.813448275862071e-07, "loss": 0.1034, "step": 249000 }, { "epoch": 2.45, "grad_norm": 5.410933017730713, "learning_rate": 8.809137931034484e-07, "loss": 0.1692, "step": 249025 }, { "epoch": 2.45, "grad_norm": 15.807644844055176, "learning_rate": 8.804827586206898e-07, "loss": 0.1181, "step": 249050 }, { "epoch": 2.45, "grad_norm": 2.741647243499756, "learning_rate": 8.800517241379311e-07, "loss": 0.171, "step": 249075 }, { "epoch": 2.45, "grad_norm": 16.123023986816406, "learning_rate": 8.796206896551725e-07, "loss": 0.0896, "step": 249100 }, { "epoch": 2.45, "grad_norm": 2.877520799636841, "learning_rate": 8.791896551724138e-07, "loss": 0.1804, "step": 249125 }, { "epoch": 2.45, "grad_norm": 16.5672664642334, "learning_rate": 8.787586206896552e-07, "loss": 0.0791, "step": 249150 }, { "epoch": 2.45, "grad_norm": 3.401031017303467, "learning_rate": 8.783275862068965e-07, "loss": 0.2135, "step": 249175 }, { "epoch": 2.45, "grad_norm": 12.38521671295166, "learning_rate": 8.778965517241381e-07, "loss": 0.1031, "step": 249200 }, { "epoch": 2.45, "grad_norm": 1.95052170753479, "learning_rate": 8.774655172413794e-07, "loss": 0.1429, "step": 249225 }, { "epoch": 2.45, "grad_norm": 6.5761189460754395, "learning_rate": 8.770344827586208e-07, "loss": 0.0884, "step": 249250 }, { "epoch": 2.45, "grad_norm": 2.7806782722473145, "learning_rate": 8.766206896551726e-07, "loss": 0.2153, "step": 249275 }, { "epoch": 2.45, "grad_norm": 10.220720291137695, "learning_rate": 8.761896551724139e-07, "loss": 0.0925, "step": 249300 }, { "epoch": 2.45, "grad_norm": 6.6573100090026855, "learning_rate": 8.757586206896553e-07, "loss": 0.1513, "step": 249325 }, { "epoch": 2.45, "grad_norm": 10.535972595214844, "learning_rate": 8.753275862068966e-07, "loss": 0.085, "step": 249350 }, { "epoch": 2.45, "grad_norm": 0.7715555429458618, "learning_rate": 8.74896551724138e-07, "loss": 0.1434, "step": 249375 }, { "epoch": 2.45, "grad_norm": 9.31518840789795, "learning_rate": 8.744655172413793e-07, "loss": 0.1202, "step": 249400 }, { "epoch": 2.45, "grad_norm": 9.07082462310791, "learning_rate": 8.740344827586207e-07, "loss": 0.2034, "step": 249425 }, { "epoch": 2.45, "grad_norm": 10.744771957397461, "learning_rate": 8.736034482758621e-07, "loss": 0.1054, "step": 249450 }, { "epoch": 2.45, "grad_norm": 6.9011616706848145, "learning_rate": 8.731724137931036e-07, "loss": 0.1807, "step": 249475 }, { "epoch": 2.45, "grad_norm": 12.99326229095459, "learning_rate": 8.727413793103449e-07, "loss": 0.0983, "step": 249500 }, { "epoch": 2.45, "grad_norm": 4.53037691116333, "learning_rate": 8.723103448275863e-07, "loss": 0.1778, "step": 249525 }, { "epoch": 2.45, "grad_norm": 9.767085075378418, "learning_rate": 8.718793103448276e-07, "loss": 0.093, "step": 249550 }, { "epoch": 2.45, "grad_norm": 3.6223256587982178, "learning_rate": 8.714482758620691e-07, "loss": 0.1925, "step": 249575 }, { "epoch": 2.45, "grad_norm": 5.779414176940918, "learning_rate": 8.710172413793104e-07, "loss": 0.0788, "step": 249600 }, { "epoch": 2.45, "grad_norm": 2.8131487369537354, "learning_rate": 8.705862068965518e-07, "loss": 0.1737, "step": 249625 }, { "epoch": 2.45, "grad_norm": 11.603975296020508, "learning_rate": 8.701551724137931e-07, "loss": 0.123, "step": 249650 }, { "epoch": 2.45, "grad_norm": 4.566542148590088, "learning_rate": 8.697241379310345e-07, "loss": 0.1743, "step": 249675 }, { "epoch": 2.46, "grad_norm": 2.4868509769439697, "learning_rate": 8.69293103448276e-07, "loss": 0.0852, "step": 249700 }, { "epoch": 2.46, "grad_norm": 5.574167728424072, "learning_rate": 8.688620689655174e-07, "loss": 0.2262, "step": 249725 }, { "epoch": 2.46, "grad_norm": 8.71968936920166, "learning_rate": 8.684310344827587e-07, "loss": 0.0819, "step": 249750 }, { "epoch": 2.46, "grad_norm": 5.805687427520752, "learning_rate": 8.680000000000001e-07, "loss": 0.191, "step": 249775 }, { "epoch": 2.46, "grad_norm": 7.183432102203369, "learning_rate": 8.675689655172414e-07, "loss": 0.0865, "step": 249800 }, { "epoch": 2.46, "grad_norm": 1.1895363330841064, "learning_rate": 8.671379310344828e-07, "loss": 0.1597, "step": 249825 }, { "epoch": 2.46, "grad_norm": 8.424238204956055, "learning_rate": 8.667068965517241e-07, "loss": 0.1426, "step": 249850 }, { "epoch": 2.46, "grad_norm": 3.475738525390625, "learning_rate": 8.662758620689655e-07, "loss": 0.1711, "step": 249875 }, { "epoch": 2.46, "grad_norm": 7.242837429046631, "learning_rate": 8.65844827586207e-07, "loss": 0.0934, "step": 249900 }, { "epoch": 2.46, "grad_norm": 4.2671895027160645, "learning_rate": 8.654137931034483e-07, "loss": 0.1718, "step": 249925 }, { "epoch": 2.46, "grad_norm": 10.74681282043457, "learning_rate": 8.649827586206898e-07, "loss": 0.1035, "step": 249950 }, { "epoch": 2.46, "grad_norm": 2.564894676208496, "learning_rate": 8.645517241379311e-07, "loss": 0.1634, "step": 249975 }, { "epoch": 2.46, "grad_norm": 11.369135856628418, "learning_rate": 8.641206896551725e-07, "loss": 0.1151, "step": 250000 }, { "epoch": 2.46, "grad_norm": 1.1904332637786865, "learning_rate": 8.636896551724139e-07, "loss": 0.1284, "step": 250025 }, { "epoch": 2.46, "grad_norm": 1.9190013408660889, "learning_rate": 8.632586206896553e-07, "loss": 0.0912, "step": 250050 }, { "epoch": 2.46, "grad_norm": 3.6812682151794434, "learning_rate": 8.628275862068966e-07, "loss": 0.2132, "step": 250075 }, { "epoch": 2.46, "grad_norm": 12.408217430114746, "learning_rate": 8.62396551724138e-07, "loss": 0.101, "step": 250100 }, { "epoch": 2.46, "grad_norm": 5.0671586990356445, "learning_rate": 8.619655172413793e-07, "loss": 0.165, "step": 250125 }, { "epoch": 2.46, "grad_norm": 5.277192115783691, "learning_rate": 8.615344827586207e-07, "loss": 0.0906, "step": 250150 }, { "epoch": 2.46, "grad_norm": 5.4902191162109375, "learning_rate": 8.611034482758622e-07, "loss": 0.172, "step": 250175 }, { "epoch": 2.46, "grad_norm": 9.261336326599121, "learning_rate": 8.606724137931036e-07, "loss": 0.0864, "step": 250200 }, { "epoch": 2.46, "grad_norm": 4.580246448516846, "learning_rate": 8.602413793103449e-07, "loss": 0.1818, "step": 250225 }, { "epoch": 2.46, "grad_norm": 14.26180362701416, "learning_rate": 8.598103448275863e-07, "loss": 0.1434, "step": 250250 }, { "epoch": 2.46, "grad_norm": 3.4435598850250244, "learning_rate": 8.593793103448276e-07, "loss": 0.1785, "step": 250275 }, { "epoch": 2.46, "grad_norm": 15.90633773803711, "learning_rate": 8.58948275862069e-07, "loss": 0.1247, "step": 250300 }, { "epoch": 2.46, "grad_norm": 2.165766954421997, "learning_rate": 8.585172413793103e-07, "loss": 0.2117, "step": 250325 }, { "epoch": 2.46, "grad_norm": 10.133613586425781, "learning_rate": 8.580862068965518e-07, "loss": 0.0645, "step": 250350 }, { "epoch": 2.46, "grad_norm": 2.7317440509796143, "learning_rate": 8.576551724137931e-07, "loss": 0.1847, "step": 250375 }, { "epoch": 2.46, "grad_norm": 9.670419692993164, "learning_rate": 8.572241379310345e-07, "loss": 0.123, "step": 250400 }, { "epoch": 2.46, "grad_norm": 4.00061559677124, "learning_rate": 8.56793103448276e-07, "loss": 0.1825, "step": 250425 }, { "epoch": 2.46, "grad_norm": 11.253525733947754, "learning_rate": 8.563620689655173e-07, "loss": 0.1008, "step": 250450 }, { "epoch": 2.46, "grad_norm": 3.702413558959961, "learning_rate": 8.559310344827588e-07, "loss": 0.2134, "step": 250475 }, { "epoch": 2.46, "grad_norm": 11.510003089904785, "learning_rate": 8.555000000000001e-07, "loss": 0.0809, "step": 250500 }, { "epoch": 2.46, "grad_norm": 5.877137660980225, "learning_rate": 8.550689655172415e-07, "loss": 0.1794, "step": 250525 }, { "epoch": 2.46, "grad_norm": 7.840363502502441, "learning_rate": 8.546379310344828e-07, "loss": 0.1119, "step": 250550 }, { "epoch": 2.46, "grad_norm": 1.5474210977554321, "learning_rate": 8.542068965517242e-07, "loss": 0.1607, "step": 250575 }, { "epoch": 2.46, "grad_norm": 9.78604793548584, "learning_rate": 8.537758620689655e-07, "loss": 0.1078, "step": 250600 }, { "epoch": 2.46, "grad_norm": 8.281816482543945, "learning_rate": 8.533448275862069e-07, "loss": 0.1783, "step": 250625 }, { "epoch": 2.46, "grad_norm": 9.27470874786377, "learning_rate": 8.529137931034484e-07, "loss": 0.0956, "step": 250650 }, { "epoch": 2.46, "grad_norm": 1.5890207290649414, "learning_rate": 8.524827586206898e-07, "loss": 0.1874, "step": 250675 }, { "epoch": 2.46, "grad_norm": 12.631131172180176, "learning_rate": 8.520517241379311e-07, "loss": 0.0974, "step": 250700 }, { "epoch": 2.47, "grad_norm": 6.3650126457214355, "learning_rate": 8.516206896551725e-07, "loss": 0.1601, "step": 250725 }, { "epoch": 2.47, "grad_norm": 9.15627670288086, "learning_rate": 8.511896551724138e-07, "loss": 0.0933, "step": 250750 }, { "epoch": 2.47, "grad_norm": 3.83488130569458, "learning_rate": 8.507586206896552e-07, "loss": 0.1737, "step": 250775 }, { "epoch": 2.47, "grad_norm": 13.27779483795166, "learning_rate": 8.503275862068966e-07, "loss": 0.0928, "step": 250800 }, { "epoch": 2.47, "grad_norm": 3.78605580329895, "learning_rate": 8.49896551724138e-07, "loss": 0.1317, "step": 250825 }, { "epoch": 2.47, "grad_norm": 13.500720024108887, "learning_rate": 8.494655172413793e-07, "loss": 0.1187, "step": 250850 }, { "epoch": 2.47, "grad_norm": 4.937334060668945, "learning_rate": 8.490344827586208e-07, "loss": 0.1475, "step": 250875 }, { "epoch": 2.47, "grad_norm": 6.240870475769043, "learning_rate": 8.486034482758621e-07, "loss": 0.0776, "step": 250900 }, { "epoch": 2.47, "grad_norm": 4.053596496582031, "learning_rate": 8.481724137931036e-07, "loss": 0.1783, "step": 250925 }, { "epoch": 2.47, "grad_norm": 5.594881057739258, "learning_rate": 8.47741379310345e-07, "loss": 0.0718, "step": 250950 }, { "epoch": 2.47, "grad_norm": 5.834920883178711, "learning_rate": 8.473103448275863e-07, "loss": 0.1619, "step": 250975 }, { "epoch": 2.47, "grad_norm": 11.370758056640625, "learning_rate": 8.468793103448277e-07, "loss": 0.1, "step": 251000 }, { "epoch": 2.47, "grad_norm": 1.982802391052246, "learning_rate": 8.46448275862069e-07, "loss": 0.1822, "step": 251025 }, { "epoch": 2.47, "grad_norm": 11.198345184326172, "learning_rate": 8.460172413793104e-07, "loss": 0.0686, "step": 251050 }, { "epoch": 2.47, "grad_norm": 5.653324604034424, "learning_rate": 8.455862068965517e-07, "loss": 0.1573, "step": 251075 }, { "epoch": 2.47, "grad_norm": 8.581552505493164, "learning_rate": 8.451551724137932e-07, "loss": 0.0699, "step": 251100 }, { "epoch": 2.47, "grad_norm": 7.044893264770508, "learning_rate": 8.447241379310346e-07, "loss": 0.2092, "step": 251125 }, { "epoch": 2.47, "grad_norm": 6.225102424621582, "learning_rate": 8.44293103448276e-07, "loss": 0.107, "step": 251150 }, { "epoch": 2.47, "grad_norm": 2.78483510017395, "learning_rate": 8.438620689655173e-07, "loss": 0.1482, "step": 251175 }, { "epoch": 2.47, "grad_norm": 8.454780578613281, "learning_rate": 8.434310344827587e-07, "loss": 0.0811, "step": 251200 }, { "epoch": 2.47, "grad_norm": 5.9627766609191895, "learning_rate": 8.43e-07, "loss": 0.1993, "step": 251225 }, { "epoch": 2.47, "grad_norm": 14.353608131408691, "learning_rate": 8.425689655172415e-07, "loss": 0.1191, "step": 251250 }, { "epoch": 2.47, "grad_norm": 7.087486267089844, "learning_rate": 8.421379310344828e-07, "loss": 0.1721, "step": 251275 }, { "epoch": 2.47, "grad_norm": 10.807602882385254, "learning_rate": 8.417068965517242e-07, "loss": 0.1189, "step": 251300 }, { "epoch": 2.47, "grad_norm": 7.623104572296143, "learning_rate": 8.412758620689655e-07, "loss": 0.1579, "step": 251325 }, { "epoch": 2.47, "grad_norm": 4.577503204345703, "learning_rate": 8.40844827586207e-07, "loss": 0.0844, "step": 251350 }, { "epoch": 2.47, "grad_norm": 3.934138059616089, "learning_rate": 8.404137931034484e-07, "loss": 0.1579, "step": 251375 }, { "epoch": 2.47, "grad_norm": 11.421502113342285, "learning_rate": 8.399827586206898e-07, "loss": 0.1003, "step": 251400 }, { "epoch": 2.47, "grad_norm": 4.9241814613342285, "learning_rate": 8.395517241379311e-07, "loss": 0.1497, "step": 251425 }, { "epoch": 2.47, "grad_norm": 12.024718284606934, "learning_rate": 8.391206896551725e-07, "loss": 0.0851, "step": 251450 }, { "epoch": 2.47, "grad_norm": 6.0491623878479, "learning_rate": 8.386896551724138e-07, "loss": 0.1647, "step": 251475 }, { "epoch": 2.47, "grad_norm": 10.952655792236328, "learning_rate": 8.382586206896552e-07, "loss": 0.077, "step": 251500 }, { "epoch": 2.47, "grad_norm": 6.5849690437316895, "learning_rate": 8.378275862068965e-07, "loss": 0.1878, "step": 251525 }, { "epoch": 2.47, "grad_norm": 9.31619644165039, "learning_rate": 8.373965517241379e-07, "loss": 0.0749, "step": 251550 }, { "epoch": 2.47, "grad_norm": 6.435527801513672, "learning_rate": 8.369655172413793e-07, "loss": 0.2217, "step": 251575 }, { "epoch": 2.47, "grad_norm": 12.194232940673828, "learning_rate": 8.365344827586208e-07, "loss": 0.0974, "step": 251600 }, { "epoch": 2.47, "grad_norm": 4.080202579498291, "learning_rate": 8.361034482758622e-07, "loss": 0.1858, "step": 251625 }, { "epoch": 2.47, "grad_norm": 7.158318042755127, "learning_rate": 8.356724137931035e-07, "loss": 0.0911, "step": 251650 }, { "epoch": 2.47, "grad_norm": 4.261127471923828, "learning_rate": 8.352413793103449e-07, "loss": 0.1967, "step": 251675 }, { "epoch": 2.47, "grad_norm": 9.660201072692871, "learning_rate": 8.348103448275863e-07, "loss": 0.0873, "step": 251700 }, { "epoch": 2.48, "grad_norm": 6.1682209968566895, "learning_rate": 8.34396551724138e-07, "loss": 0.1789, "step": 251725 }, { "epoch": 2.48, "grad_norm": 16.899614334106445, "learning_rate": 8.339655172413793e-07, "loss": 0.0928, "step": 251750 }, { "epoch": 2.48, "grad_norm": 6.939659118652344, "learning_rate": 8.335344827586207e-07, "loss": 0.2076, "step": 251775 }, { "epoch": 2.48, "grad_norm": 12.12402629852295, "learning_rate": 8.331034482758621e-07, "loss": 0.0751, "step": 251800 }, { "epoch": 2.48, "grad_norm": 6.308591365814209, "learning_rate": 8.326724137931035e-07, "loss": 0.1916, "step": 251825 }, { "epoch": 2.48, "grad_norm": 5.665630340576172, "learning_rate": 8.32241379310345e-07, "loss": 0.0674, "step": 251850 }, { "epoch": 2.48, "grad_norm": 1.683381199836731, "learning_rate": 8.318103448275863e-07, "loss": 0.1974, "step": 251875 }, { "epoch": 2.48, "grad_norm": 6.986183166503906, "learning_rate": 8.313793103448277e-07, "loss": 0.0857, "step": 251900 }, { "epoch": 2.48, "grad_norm": 5.6157426834106445, "learning_rate": 8.30948275862069e-07, "loss": 0.1641, "step": 251925 }, { "epoch": 2.48, "grad_norm": 14.089495658874512, "learning_rate": 8.305172413793105e-07, "loss": 0.0974, "step": 251950 }, { "epoch": 2.48, "grad_norm": 0.13971596956253052, "learning_rate": 8.300862068965518e-07, "loss": 0.1776, "step": 251975 }, { "epoch": 2.48, "grad_norm": 9.660189628601074, "learning_rate": 8.296551724137932e-07, "loss": 0.0964, "step": 252000 }, { "epoch": 2.48, "grad_norm": 6.707589626312256, "learning_rate": 8.292241379310345e-07, "loss": 0.2036, "step": 252025 }, { "epoch": 2.48, "grad_norm": 8.696737289428711, "learning_rate": 8.287931034482759e-07, "loss": 0.0816, "step": 252050 }, { "epoch": 2.48, "grad_norm": 3.895548105239868, "learning_rate": 8.283620689655172e-07, "loss": 0.1747, "step": 252075 }, { "epoch": 2.48, "grad_norm": 17.72128677368164, "learning_rate": 8.279310344827588e-07, "loss": 0.1146, "step": 252100 }, { "epoch": 2.48, "grad_norm": 3.8370823860168457, "learning_rate": 8.275000000000001e-07, "loss": 0.2115, "step": 252125 }, { "epoch": 2.48, "grad_norm": 10.548165321350098, "learning_rate": 8.270689655172415e-07, "loss": 0.0919, "step": 252150 }, { "epoch": 2.48, "grad_norm": 4.846226215362549, "learning_rate": 8.266379310344828e-07, "loss": 0.1609, "step": 252175 }, { "epoch": 2.48, "grad_norm": 9.684663772583008, "learning_rate": 8.262068965517242e-07, "loss": 0.0942, "step": 252200 }, { "epoch": 2.48, "grad_norm": 3.411247491836548, "learning_rate": 8.257758620689655e-07, "loss": 0.1802, "step": 252225 }, { "epoch": 2.48, "grad_norm": 4.01871919631958, "learning_rate": 8.25344827586207e-07, "loss": 0.0894, "step": 252250 }, { "epoch": 2.48, "grad_norm": 0.800376832485199, "learning_rate": 8.249137931034483e-07, "loss": 0.1857, "step": 252275 }, { "epoch": 2.48, "grad_norm": 9.62108325958252, "learning_rate": 8.244827586206897e-07, "loss": 0.1017, "step": 252300 }, { "epoch": 2.48, "grad_norm": 1.4377338886260986, "learning_rate": 8.240517241379311e-07, "loss": 0.1603, "step": 252325 }, { "epoch": 2.48, "grad_norm": 10.814118385314941, "learning_rate": 8.236206896551725e-07, "loss": 0.0916, "step": 252350 }, { "epoch": 2.48, "grad_norm": 3.8467090129852295, "learning_rate": 8.231896551724138e-07, "loss": 0.203, "step": 252375 }, { "epoch": 2.48, "grad_norm": 9.078457832336426, "learning_rate": 8.227586206896553e-07, "loss": 0.0741, "step": 252400 }, { "epoch": 2.48, "grad_norm": 3.266397476196289, "learning_rate": 8.223275862068966e-07, "loss": 0.1837, "step": 252425 }, { "epoch": 2.48, "grad_norm": 9.105229377746582, "learning_rate": 8.21896551724138e-07, "loss": 0.1154, "step": 252450 }, { "epoch": 2.48, "grad_norm": 2.2030444145202637, "learning_rate": 8.214655172413793e-07, "loss": 0.1644, "step": 252475 }, { "epoch": 2.48, "grad_norm": 5.933074474334717, "learning_rate": 8.210344827586207e-07, "loss": 0.0801, "step": 252500 }, { "epoch": 2.48, "grad_norm": 3.2149994373321533, "learning_rate": 8.20603448275862e-07, "loss": 0.2133, "step": 252525 }, { "epoch": 2.48, "grad_norm": 8.369661331176758, "learning_rate": 8.201724137931036e-07, "loss": 0.0903, "step": 252550 }, { "epoch": 2.48, "grad_norm": 4.761501789093018, "learning_rate": 8.19741379310345e-07, "loss": 0.207, "step": 252575 }, { "epoch": 2.48, "grad_norm": 14.129965782165527, "learning_rate": 8.193103448275863e-07, "loss": 0.1036, "step": 252600 }, { "epoch": 2.48, "grad_norm": 3.4398193359375, "learning_rate": 8.188793103448277e-07, "loss": 0.1298, "step": 252625 }, { "epoch": 2.48, "grad_norm": 7.964207649230957, "learning_rate": 8.18448275862069e-07, "loss": 0.0981, "step": 252650 }, { "epoch": 2.48, "grad_norm": 2.8862907886505127, "learning_rate": 8.180172413793104e-07, "loss": 0.1739, "step": 252675 }, { "epoch": 2.48, "grad_norm": 9.80313777923584, "learning_rate": 8.175862068965518e-07, "loss": 0.1609, "step": 252700 }, { "epoch": 2.48, "grad_norm": 5.856678485870361, "learning_rate": 8.171551724137932e-07, "loss": 0.1783, "step": 252725 }, { "epoch": 2.49, "grad_norm": 12.655532836914062, "learning_rate": 8.167241379310345e-07, "loss": 0.109, "step": 252750 }, { "epoch": 2.49, "grad_norm": 6.668423175811768, "learning_rate": 8.162931034482759e-07, "loss": 0.1855, "step": 252775 }, { "epoch": 2.49, "grad_norm": 12.94113540649414, "learning_rate": 8.158620689655173e-07, "loss": 0.0914, "step": 252800 }, { "epoch": 2.49, "grad_norm": 3.405545234680176, "learning_rate": 8.154310344827587e-07, "loss": 0.1855, "step": 252825 }, { "epoch": 2.49, "grad_norm": 5.765199661254883, "learning_rate": 8.150000000000001e-07, "loss": 0.0742, "step": 252850 }, { "epoch": 2.49, "grad_norm": 1.2945133447647095, "learning_rate": 8.145689655172415e-07, "loss": 0.1986, "step": 252875 }, { "epoch": 2.49, "grad_norm": 7.678348064422607, "learning_rate": 8.141379310344828e-07, "loss": 0.0992, "step": 252900 }, { "epoch": 2.49, "grad_norm": 6.700366020202637, "learning_rate": 8.137068965517242e-07, "loss": 0.1873, "step": 252925 }, { "epoch": 2.49, "grad_norm": 11.00976848602295, "learning_rate": 8.132758620689655e-07, "loss": 0.0961, "step": 252950 }, { "epoch": 2.49, "grad_norm": 5.341731071472168, "learning_rate": 8.128448275862069e-07, "loss": 0.1447, "step": 252975 }, { "epoch": 2.49, "grad_norm": 8.923531532287598, "learning_rate": 8.124137931034482e-07, "loss": 0.1293, "step": 253000 }, { "epoch": 2.49, "grad_norm": 4.138914585113525, "learning_rate": 8.119827586206898e-07, "loss": 0.15, "step": 253025 }, { "epoch": 2.49, "grad_norm": 12.93727970123291, "learning_rate": 8.115517241379312e-07, "loss": 0.0896, "step": 253050 }, { "epoch": 2.49, "grad_norm": 2.156019926071167, "learning_rate": 8.111206896551725e-07, "loss": 0.189, "step": 253075 }, { "epoch": 2.49, "grad_norm": 10.314472198486328, "learning_rate": 8.106896551724139e-07, "loss": 0.0909, "step": 253100 }, { "epoch": 2.49, "grad_norm": 3.8379106521606445, "learning_rate": 8.102586206896552e-07, "loss": 0.1692, "step": 253125 }, { "epoch": 2.49, "grad_norm": 12.794468879699707, "learning_rate": 8.098275862068967e-07, "loss": 0.0973, "step": 253150 }, { "epoch": 2.49, "grad_norm": 4.406872749328613, "learning_rate": 8.09396551724138e-07, "loss": 0.2053, "step": 253175 }, { "epoch": 2.49, "grad_norm": 9.580181121826172, "learning_rate": 8.089655172413794e-07, "loss": 0.104, "step": 253200 }, { "epoch": 2.49, "grad_norm": 0.17197579145431519, "learning_rate": 8.085344827586207e-07, "loss": 0.1584, "step": 253225 }, { "epoch": 2.49, "grad_norm": 4.1226301193237305, "learning_rate": 8.081034482758621e-07, "loss": 0.0981, "step": 253250 }, { "epoch": 2.49, "grad_norm": 3.181471347808838, "learning_rate": 8.076724137931035e-07, "loss": 0.1481, "step": 253275 }, { "epoch": 2.49, "grad_norm": 13.436280250549316, "learning_rate": 8.07241379310345e-07, "loss": 0.142, "step": 253300 }, { "epoch": 2.49, "grad_norm": 2.4417169094085693, "learning_rate": 8.068103448275863e-07, "loss": 0.2007, "step": 253325 }, { "epoch": 2.49, "grad_norm": 9.653705596923828, "learning_rate": 8.063793103448277e-07, "loss": 0.0844, "step": 253350 }, { "epoch": 2.49, "grad_norm": 4.337924957275391, "learning_rate": 8.05948275862069e-07, "loss": 0.1545, "step": 253375 }, { "epoch": 2.49, "grad_norm": 8.88379192352295, "learning_rate": 8.055172413793104e-07, "loss": 0.0955, "step": 253400 }, { "epoch": 2.49, "grad_norm": 3.26310658454895, "learning_rate": 8.050862068965517e-07, "loss": 0.1617, "step": 253425 }, { "epoch": 2.49, "grad_norm": 15.449193954467773, "learning_rate": 8.046551724137931e-07, "loss": 0.1052, "step": 253450 }, { "epoch": 2.49, "grad_norm": 3.9822661876678467, "learning_rate": 8.042241379310345e-07, "loss": 0.1926, "step": 253475 }, { "epoch": 2.49, "grad_norm": 8.232183456420898, "learning_rate": 8.03793103448276e-07, "loss": 0.0863, "step": 253500 }, { "epoch": 2.49, "grad_norm": 2.2693707942962646, "learning_rate": 8.033620689655173e-07, "loss": 0.1402, "step": 253525 }, { "epoch": 2.49, "grad_norm": 9.980506896972656, "learning_rate": 8.029310344827587e-07, "loss": 0.1035, "step": 253550 }, { "epoch": 2.49, "grad_norm": 2.5122194290161133, "learning_rate": 8.025e-07, "loss": 0.1665, "step": 253575 }, { "epoch": 2.49, "grad_norm": 13.352156639099121, "learning_rate": 8.020689655172414e-07, "loss": 0.0982, "step": 253600 }, { "epoch": 2.49, "grad_norm": 0.45901238918304443, "learning_rate": 8.016379310344829e-07, "loss": 0.2, "step": 253625 }, { "epoch": 2.49, "grad_norm": 9.339624404907227, "learning_rate": 8.012068965517242e-07, "loss": 0.0905, "step": 253650 }, { "epoch": 2.49, "grad_norm": 7.813998699188232, "learning_rate": 8.007758620689656e-07, "loss": 0.2105, "step": 253675 }, { "epoch": 2.49, "grad_norm": 14.784506797790527, "learning_rate": 8.003448275862069e-07, "loss": 0.1023, "step": 253700 }, { "epoch": 2.49, "grad_norm": 9.197792053222656, "learning_rate": 7.999137931034484e-07, "loss": 0.202, "step": 253725 }, { "epoch": 2.49, "grad_norm": 11.03115177154541, "learning_rate": 7.994827586206898e-07, "loss": 0.0844, "step": 253750 }, { "epoch": 2.5, "grad_norm": 5.400504112243652, "learning_rate": 7.990517241379312e-07, "loss": 0.1764, "step": 253775 }, { "epoch": 2.5, "grad_norm": 7.439888000488281, "learning_rate": 7.986206896551725e-07, "loss": 0.0941, "step": 253800 }, { "epoch": 2.5, "grad_norm": 3.5715789794921875, "learning_rate": 7.981896551724139e-07, "loss": 0.1684, "step": 253825 }, { "epoch": 2.5, "grad_norm": 7.837950706481934, "learning_rate": 7.977586206896552e-07, "loss": 0.1091, "step": 253850 }, { "epoch": 2.5, "grad_norm": 3.433957099914551, "learning_rate": 7.973275862068966e-07, "loss": 0.142, "step": 253875 }, { "epoch": 2.5, "grad_norm": 10.062509536743164, "learning_rate": 7.968965517241379e-07, "loss": 0.0981, "step": 253900 }, { "epoch": 2.5, "grad_norm": 5.545358180999756, "learning_rate": 7.964655172413794e-07, "loss": 0.1727, "step": 253925 }, { "epoch": 2.5, "grad_norm": 13.537099838256836, "learning_rate": 7.960344827586207e-07, "loss": 0.1148, "step": 253950 }, { "epoch": 2.5, "grad_norm": 2.5892043113708496, "learning_rate": 7.956206896551724e-07, "loss": 0.1708, "step": 253975 }, { "epoch": 2.5, "grad_norm": 11.366560935974121, "learning_rate": 7.95189655172414e-07, "loss": 0.0918, "step": 254000 }, { "epoch": 2.5, "grad_norm": 0.156910702586174, "learning_rate": 7.947586206896553e-07, "loss": 0.194, "step": 254025 }, { "epoch": 2.5, "grad_norm": 9.168098449707031, "learning_rate": 7.943275862068967e-07, "loss": 0.0988, "step": 254050 }, { "epoch": 2.5, "grad_norm": 0.9914923906326294, "learning_rate": 7.93896551724138e-07, "loss": 0.1664, "step": 254075 }, { "epoch": 2.5, "grad_norm": 13.317317962646484, "learning_rate": 7.934655172413794e-07, "loss": 0.1025, "step": 254100 }, { "epoch": 2.5, "grad_norm": 4.488197326660156, "learning_rate": 7.930344827586207e-07, "loss": 0.1641, "step": 254125 }, { "epoch": 2.5, "grad_norm": 11.799986839294434, "learning_rate": 7.926034482758621e-07, "loss": 0.1005, "step": 254150 }, { "epoch": 2.5, "grad_norm": 5.512648582458496, "learning_rate": 7.921724137931035e-07, "loss": 0.1613, "step": 254175 }, { "epoch": 2.5, "grad_norm": 7.390122413635254, "learning_rate": 7.917413793103449e-07, "loss": 0.0993, "step": 254200 }, { "epoch": 2.5, "grad_norm": 11.631023406982422, "learning_rate": 7.913103448275863e-07, "loss": 0.1649, "step": 254225 }, { "epoch": 2.5, "grad_norm": 13.007915496826172, "learning_rate": 7.908793103448277e-07, "loss": 0.1023, "step": 254250 }, { "epoch": 2.5, "grad_norm": 2.0469202995300293, "learning_rate": 7.90448275862069e-07, "loss": 0.1531, "step": 254275 }, { "epoch": 2.5, "grad_norm": 13.278791427612305, "learning_rate": 7.900172413793105e-07, "loss": 0.1098, "step": 254300 }, { "epoch": 2.5, "grad_norm": 3.460278034210205, "learning_rate": 7.895862068965518e-07, "loss": 0.1759, "step": 254325 }, { "epoch": 2.5, "grad_norm": 9.995479583740234, "learning_rate": 7.891551724137932e-07, "loss": 0.0975, "step": 254350 }, { "epoch": 2.5, "grad_norm": 7.206801891326904, "learning_rate": 7.887241379310345e-07, "loss": 0.2334, "step": 254375 }, { "epoch": 2.5, "grad_norm": 13.521387100219727, "learning_rate": 7.882931034482759e-07, "loss": 0.0924, "step": 254400 }, { "epoch": 2.5, "grad_norm": 3.7192764282226562, "learning_rate": 7.878620689655172e-07, "loss": 0.1843, "step": 254425 }, { "epoch": 2.5, "grad_norm": 9.591487884521484, "learning_rate": 7.874310344827586e-07, "loss": 0.1043, "step": 254450 }, { "epoch": 2.5, "grad_norm": 0.4909493923187256, "learning_rate": 7.870000000000002e-07, "loss": 0.1671, "step": 254475 }, { "epoch": 2.5, "grad_norm": 11.287385940551758, "learning_rate": 7.865689655172415e-07, "loss": 0.0888, "step": 254500 }, { "epoch": 2.5, "grad_norm": 2.1480090618133545, "learning_rate": 7.861379310344829e-07, "loss": 0.1782, "step": 254525 }, { "epoch": 2.5, "grad_norm": 7.271322727203369, "learning_rate": 7.857068965517242e-07, "loss": 0.1118, "step": 254550 }, { "epoch": 2.5, "grad_norm": 6.483983039855957, "learning_rate": 7.852758620689656e-07, "loss": 0.1863, "step": 254575 }, { "epoch": 2.5, "grad_norm": 13.150079727172852, "learning_rate": 7.848448275862069e-07, "loss": 0.093, "step": 254600 }, { "epoch": 2.5, "grad_norm": 6.8121514320373535, "learning_rate": 7.844137931034484e-07, "loss": 0.1785, "step": 254625 }, { "epoch": 2.5, "grad_norm": 11.534154891967773, "learning_rate": 7.839827586206897e-07, "loss": 0.0955, "step": 254650 }, { "epoch": 2.5, "grad_norm": 0.3382224142551422, "learning_rate": 7.835517241379311e-07, "loss": 0.2182, "step": 254675 }, { "epoch": 2.5, "grad_norm": 20.78724479675293, "learning_rate": 7.831206896551725e-07, "loss": 0.1202, "step": 254700 }, { "epoch": 2.5, "grad_norm": 4.310933589935303, "learning_rate": 7.826896551724139e-07, "loss": 0.1681, "step": 254725 }, { "epoch": 2.5, "grad_norm": 6.833812236785889, "learning_rate": 7.822586206896553e-07, "loss": 0.0836, "step": 254750 }, { "epoch": 2.5, "grad_norm": 5.8636555671691895, "learning_rate": 7.818275862068967e-07, "loss": 0.1973, "step": 254775 }, { "epoch": 2.51, "grad_norm": 7.615400791168213, "learning_rate": 7.81396551724138e-07, "loss": 0.0903, "step": 254800 }, { "epoch": 2.51, "grad_norm": 1.7447527647018433, "learning_rate": 7.809655172413794e-07, "loss": 0.16, "step": 254825 }, { "epoch": 2.51, "grad_norm": 8.755663871765137, "learning_rate": 7.805344827586207e-07, "loss": 0.0838, "step": 254850 }, { "epoch": 2.51, "grad_norm": 2.9911582469940186, "learning_rate": 7.801034482758621e-07, "loss": 0.157, "step": 254875 }, { "epoch": 2.51, "grad_norm": 14.254398345947266, "learning_rate": 7.796724137931034e-07, "loss": 0.0853, "step": 254900 }, { "epoch": 2.51, "grad_norm": 3.8649139404296875, "learning_rate": 7.792413793103448e-07, "loss": 0.1934, "step": 254925 }, { "epoch": 2.51, "grad_norm": 10.333308219909668, "learning_rate": 7.788103448275863e-07, "loss": 0.0848, "step": 254950 }, { "epoch": 2.51, "grad_norm": 6.613531589508057, "learning_rate": 7.783793103448277e-07, "loss": 0.1454, "step": 254975 }, { "epoch": 2.51, "grad_norm": 9.555251121520996, "learning_rate": 7.77948275862069e-07, "loss": 0.0911, "step": 255000 }, { "epoch": 2.51, "grad_norm": 2.054863452911377, "learning_rate": 7.775172413793104e-07, "loss": 0.1456, "step": 255025 }, { "epoch": 2.51, "grad_norm": 4.544980525970459, "learning_rate": 7.770862068965517e-07, "loss": 0.1435, "step": 255050 }, { "epoch": 2.51, "grad_norm": 4.5261383056640625, "learning_rate": 7.766551724137932e-07, "loss": 0.1344, "step": 255075 }, { "epoch": 2.51, "grad_norm": 10.911783218383789, "learning_rate": 7.762241379310346e-07, "loss": 0.1025, "step": 255100 }, { "epoch": 2.51, "grad_norm": 3.220263719558716, "learning_rate": 7.757931034482759e-07, "loss": 0.1888, "step": 255125 }, { "epoch": 2.51, "grad_norm": 9.34064769744873, "learning_rate": 7.753620689655173e-07, "loss": 0.0921, "step": 255150 }, { "epoch": 2.51, "grad_norm": 3.774864673614502, "learning_rate": 7.749310344827587e-07, "loss": 0.1608, "step": 255175 }, { "epoch": 2.51, "grad_norm": 10.57157039642334, "learning_rate": 7.745000000000002e-07, "loss": 0.1109, "step": 255200 }, { "epoch": 2.51, "grad_norm": 6.262356281280518, "learning_rate": 7.740689655172415e-07, "loss": 0.184, "step": 255225 }, { "epoch": 2.51, "grad_norm": 7.069781303405762, "learning_rate": 7.736379310344829e-07, "loss": 0.0916, "step": 255250 }, { "epoch": 2.51, "grad_norm": 1.6724166870117188, "learning_rate": 7.732068965517242e-07, "loss": 0.1868, "step": 255275 }, { "epoch": 2.51, "grad_norm": 9.707403182983398, "learning_rate": 7.727758620689656e-07, "loss": 0.0759, "step": 255300 }, { "epoch": 2.51, "grad_norm": 3.546387195587158, "learning_rate": 7.723448275862069e-07, "loss": 0.2169, "step": 255325 }, { "epoch": 2.51, "grad_norm": 11.33749771118164, "learning_rate": 7.719137931034483e-07, "loss": 0.0881, "step": 255350 }, { "epoch": 2.51, "grad_norm": 7.428859233856201, "learning_rate": 7.714827586206896e-07, "loss": 0.1973, "step": 255375 }, { "epoch": 2.51, "grad_norm": 12.6555757522583, "learning_rate": 7.710517241379311e-07, "loss": 0.0931, "step": 255400 }, { "epoch": 2.51, "grad_norm": 4.487953186035156, "learning_rate": 7.706206896551725e-07, "loss": 0.1646, "step": 255425 }, { "epoch": 2.51, "grad_norm": 12.903512954711914, "learning_rate": 7.701896551724139e-07, "loss": 0.101, "step": 255450 }, { "epoch": 2.51, "grad_norm": 1.9197711944580078, "learning_rate": 7.697586206896552e-07, "loss": 0.1535, "step": 255475 }, { "epoch": 2.51, "grad_norm": 16.546606063842773, "learning_rate": 7.693275862068966e-07, "loss": 0.12, "step": 255500 }, { "epoch": 2.51, "grad_norm": 4.617104530334473, "learning_rate": 7.68896551724138e-07, "loss": 0.1756, "step": 255525 }, { "epoch": 2.51, "grad_norm": 10.501569747924805, "learning_rate": 7.684655172413794e-07, "loss": 0.0924, "step": 255550 }, { "epoch": 2.51, "grad_norm": 4.454726219177246, "learning_rate": 7.680344827586207e-07, "loss": 0.1914, "step": 255575 }, { "epoch": 2.51, "grad_norm": 11.498823165893555, "learning_rate": 7.676034482758621e-07, "loss": 0.1109, "step": 255600 }, { "epoch": 2.51, "grad_norm": 4.063804626464844, "learning_rate": 7.671724137931034e-07, "loss": 0.1748, "step": 255625 }, { "epoch": 2.51, "grad_norm": 8.94227123260498, "learning_rate": 7.667413793103449e-07, "loss": 0.1107, "step": 255650 }, { "epoch": 2.51, "grad_norm": 1.973187804222107, "learning_rate": 7.663103448275864e-07, "loss": 0.1479, "step": 255675 }, { "epoch": 2.51, "grad_norm": 6.654633045196533, "learning_rate": 7.658793103448277e-07, "loss": 0.1042, "step": 255700 }, { "epoch": 2.51, "grad_norm": 6.978112697601318, "learning_rate": 7.654482758620691e-07, "loss": 0.1692, "step": 255725 }, { "epoch": 2.51, "grad_norm": 8.272438049316406, "learning_rate": 7.650172413793104e-07, "loss": 0.1008, "step": 255750 }, { "epoch": 2.51, "grad_norm": 5.903392314910889, "learning_rate": 7.645862068965518e-07, "loss": 0.1826, "step": 255775 }, { "epoch": 2.52, "grad_norm": 11.918399810791016, "learning_rate": 7.641551724137931e-07, "loss": 0.0731, "step": 255800 }, { "epoch": 2.52, "grad_norm": 4.04695987701416, "learning_rate": 7.637241379310345e-07, "loss": 0.1724, "step": 255825 }, { "epoch": 2.52, "grad_norm": 9.581595420837402, "learning_rate": 7.632931034482759e-07, "loss": 0.1118, "step": 255850 }, { "epoch": 2.52, "grad_norm": 1.0690104961395264, "learning_rate": 7.628620689655174e-07, "loss": 0.1142, "step": 255875 }, { "epoch": 2.52, "grad_norm": 9.159611701965332, "learning_rate": 7.624310344827587e-07, "loss": 0.0863, "step": 255900 }, { "epoch": 2.52, "grad_norm": 1.9383245706558228, "learning_rate": 7.620000000000001e-07, "loss": 0.1652, "step": 255925 }, { "epoch": 2.52, "grad_norm": 9.865768432617188, "learning_rate": 7.615689655172414e-07, "loss": 0.0886, "step": 255950 }, { "epoch": 2.52, "grad_norm": 3.5496413707733154, "learning_rate": 7.611379310344829e-07, "loss": 0.157, "step": 255975 }, { "epoch": 2.52, "grad_norm": 9.257547378540039, "learning_rate": 7.607068965517242e-07, "loss": 0.08, "step": 256000 }, { "epoch": 2.52, "grad_norm": 4.58927059173584, "learning_rate": 7.602758620689656e-07, "loss": 0.1696, "step": 256025 }, { "epoch": 2.52, "grad_norm": 13.81490707397461, "learning_rate": 7.598448275862069e-07, "loss": 0.0972, "step": 256050 }, { "epoch": 2.52, "grad_norm": 0.17282092571258545, "learning_rate": 7.594137931034483e-07, "loss": 0.1793, "step": 256075 }, { "epoch": 2.52, "grad_norm": 9.961833953857422, "learning_rate": 7.589827586206896e-07, "loss": 0.129, "step": 256100 }, { "epoch": 2.52, "grad_norm": 3.686575174331665, "learning_rate": 7.585689655172414e-07, "loss": 0.1654, "step": 256125 }, { "epoch": 2.52, "grad_norm": 6.830992221832275, "learning_rate": 7.581379310344829e-07, "loss": 0.0794, "step": 256150 }, { "epoch": 2.52, "grad_norm": 5.21045446395874, "learning_rate": 7.577068965517242e-07, "loss": 0.1438, "step": 256175 }, { "epoch": 2.52, "grad_norm": 6.207311630249023, "learning_rate": 7.572758620689656e-07, "loss": 0.0772, "step": 256200 }, { "epoch": 2.52, "grad_norm": 7.831894397735596, "learning_rate": 7.56844827586207e-07, "loss": 0.1905, "step": 256225 }, { "epoch": 2.52, "grad_norm": 10.234587669372559, "learning_rate": 7.564137931034484e-07, "loss": 0.0662, "step": 256250 }, { "epoch": 2.52, "grad_norm": 0.2745361626148224, "learning_rate": 7.559827586206897e-07, "loss": 0.1999, "step": 256275 }, { "epoch": 2.52, "grad_norm": 7.142472267150879, "learning_rate": 7.555517241379311e-07, "loss": 0.1004, "step": 256300 }, { "epoch": 2.52, "grad_norm": 6.544914722442627, "learning_rate": 7.551206896551724e-07, "loss": 0.1615, "step": 256325 }, { "epoch": 2.52, "grad_norm": 5.997125148773193, "learning_rate": 7.546896551724138e-07, "loss": 0.0784, "step": 256350 }, { "epoch": 2.52, "grad_norm": 1.1255205869674683, "learning_rate": 7.542586206896553e-07, "loss": 0.2231, "step": 256375 }, { "epoch": 2.52, "grad_norm": 7.7773051261901855, "learning_rate": 7.538275862068967e-07, "loss": 0.0985, "step": 256400 }, { "epoch": 2.52, "grad_norm": 4.534698009490967, "learning_rate": 7.53396551724138e-07, "loss": 0.198, "step": 256425 }, { "epoch": 2.52, "grad_norm": 11.80140209197998, "learning_rate": 7.529655172413794e-07, "loss": 0.1514, "step": 256450 }, { "epoch": 2.52, "grad_norm": 1.4842151403427124, "learning_rate": 7.525344827586207e-07, "loss": 0.1689, "step": 256475 }, { "epoch": 2.52, "grad_norm": 10.872756958007812, "learning_rate": 7.521034482758621e-07, "loss": 0.0975, "step": 256500 }, { "epoch": 2.52, "grad_norm": 3.8130550384521484, "learning_rate": 7.516724137931034e-07, "loss": 0.1726, "step": 256525 }, { "epoch": 2.52, "grad_norm": 8.824122428894043, "learning_rate": 7.512413793103449e-07, "loss": 0.0951, "step": 256550 }, { "epoch": 2.52, "grad_norm": 2.8749120235443115, "learning_rate": 7.508103448275863e-07, "loss": 0.1386, "step": 256575 }, { "epoch": 2.52, "grad_norm": 17.479015350341797, "learning_rate": 7.503793103448276e-07, "loss": 0.0843, "step": 256600 }, { "epoch": 2.52, "grad_norm": 5.9609880447387695, "learning_rate": 7.499482758620691e-07, "loss": 0.1909, "step": 256625 }, { "epoch": 2.52, "grad_norm": 10.20399284362793, "learning_rate": 7.495172413793104e-07, "loss": 0.0905, "step": 256650 }, { "epoch": 2.52, "grad_norm": 3.4550678730010986, "learning_rate": 7.490862068965519e-07, "loss": 0.1368, "step": 256675 }, { "epoch": 2.52, "grad_norm": 12.698596954345703, "learning_rate": 7.486551724137932e-07, "loss": 0.083, "step": 256700 }, { "epoch": 2.52, "grad_norm": 4.570207118988037, "learning_rate": 7.482241379310346e-07, "loss": 0.1694, "step": 256725 }, { "epoch": 2.52, "grad_norm": 13.133756637573242, "learning_rate": 7.477931034482759e-07, "loss": 0.1023, "step": 256750 }, { "epoch": 2.52, "grad_norm": 4.1862568855285645, "learning_rate": 7.473620689655173e-07, "loss": 0.1859, "step": 256775 }, { "epoch": 2.52, "grad_norm": 10.51638412475586, "learning_rate": 7.469310344827586e-07, "loss": 0.0874, "step": 256800 }, { "epoch": 2.53, "grad_norm": 5.022136211395264, "learning_rate": 7.465e-07, "loss": 0.1693, "step": 256825 }, { "epoch": 2.53, "grad_norm": 17.487783432006836, "learning_rate": 7.460689655172415e-07, "loss": 0.085, "step": 256850 }, { "epoch": 2.53, "grad_norm": 3.569392204284668, "learning_rate": 7.456379310344829e-07, "loss": 0.1858, "step": 256875 }, { "epoch": 2.53, "grad_norm": 7.9844770431518555, "learning_rate": 7.452068965517242e-07, "loss": 0.1229, "step": 256900 }, { "epoch": 2.53, "grad_norm": 2.3547303676605225, "learning_rate": 7.447758620689656e-07, "loss": 0.16, "step": 256925 }, { "epoch": 2.53, "grad_norm": 11.9663667678833, "learning_rate": 7.443448275862069e-07, "loss": 0.0949, "step": 256950 }, { "epoch": 2.53, "grad_norm": 1.9696520566940308, "learning_rate": 7.439137931034483e-07, "loss": 0.1599, "step": 256975 }, { "epoch": 2.53, "grad_norm": 12.473143577575684, "learning_rate": 7.434827586206897e-07, "loss": 0.0949, "step": 257000 }, { "epoch": 2.53, "grad_norm": 5.137742519378662, "learning_rate": 7.430517241379311e-07, "loss": 0.1882, "step": 257025 }, { "epoch": 2.53, "grad_norm": 10.914069175720215, "learning_rate": 7.426206896551724e-07, "loss": 0.0826, "step": 257050 }, { "epoch": 2.53, "grad_norm": 0.7807998061180115, "learning_rate": 7.421896551724138e-07, "loss": 0.1811, "step": 257075 }, { "epoch": 2.53, "grad_norm": 9.983339309692383, "learning_rate": 7.417586206896552e-07, "loss": 0.0943, "step": 257100 }, { "epoch": 2.53, "grad_norm": 3.1594910621643066, "learning_rate": 7.413275862068967e-07, "loss": 0.1706, "step": 257125 }, { "epoch": 2.53, "grad_norm": 15.107118606567383, "learning_rate": 7.408965517241381e-07, "loss": 0.115, "step": 257150 }, { "epoch": 2.53, "grad_norm": 0.10611565411090851, "learning_rate": 7.404655172413794e-07, "loss": 0.174, "step": 257175 }, { "epoch": 2.53, "grad_norm": 8.320733070373535, "learning_rate": 7.400344827586208e-07, "loss": 0.0937, "step": 257200 }, { "epoch": 2.53, "grad_norm": 7.9710259437561035, "learning_rate": 7.396034482758621e-07, "loss": 0.1903, "step": 257225 }, { "epoch": 2.53, "grad_norm": 11.782301902770996, "learning_rate": 7.391724137931035e-07, "loss": 0.0807, "step": 257250 }, { "epoch": 2.53, "grad_norm": 1.1441717147827148, "learning_rate": 7.387413793103448e-07, "loss": 0.1567, "step": 257275 }, { "epoch": 2.53, "grad_norm": 9.44555950164795, "learning_rate": 7.383103448275862e-07, "loss": 0.0946, "step": 257300 }, { "epoch": 2.53, "grad_norm": 4.110232353210449, "learning_rate": 7.378793103448277e-07, "loss": 0.1952, "step": 257325 }, { "epoch": 2.53, "grad_norm": 8.771865844726562, "learning_rate": 7.374482758620691e-07, "loss": 0.1081, "step": 257350 }, { "epoch": 2.53, "grad_norm": 5.875725269317627, "learning_rate": 7.370172413793104e-07, "loss": 0.1635, "step": 257375 }, { "epoch": 2.53, "grad_norm": 14.03309440612793, "learning_rate": 7.365862068965518e-07, "loss": 0.1066, "step": 257400 }, { "epoch": 2.53, "grad_norm": 3.438002109527588, "learning_rate": 7.361551724137931e-07, "loss": 0.2056, "step": 257425 }, { "epoch": 2.53, "grad_norm": 9.57994556427002, "learning_rate": 7.357241379310346e-07, "loss": 0.1265, "step": 257450 }, { "epoch": 2.53, "grad_norm": 1.570738673210144, "learning_rate": 7.352931034482759e-07, "loss": 0.1278, "step": 257475 }, { "epoch": 2.53, "grad_norm": 15.562185287475586, "learning_rate": 7.348620689655173e-07, "loss": 0.1107, "step": 257500 }, { "epoch": 2.53, "grad_norm": 6.354142665863037, "learning_rate": 7.344310344827586e-07, "loss": 0.1764, "step": 257525 }, { "epoch": 2.53, "grad_norm": 6.34661340713501, "learning_rate": 7.340000000000001e-07, "loss": 0.104, "step": 257550 }, { "epoch": 2.53, "grad_norm": 4.515473365783691, "learning_rate": 7.335689655172415e-07, "loss": 0.1762, "step": 257575 }, { "epoch": 2.53, "grad_norm": 11.882220268249512, "learning_rate": 7.331379310344829e-07, "loss": 0.1093, "step": 257600 }, { "epoch": 2.53, "grad_norm": 6.822022438049316, "learning_rate": 7.327068965517242e-07, "loss": 0.2254, "step": 257625 }, { "epoch": 2.53, "grad_norm": 7.975339889526367, "learning_rate": 7.322758620689656e-07, "loss": 0.0763, "step": 257650 }, { "epoch": 2.53, "grad_norm": 6.799055576324463, "learning_rate": 7.318448275862069e-07, "loss": 0.178, "step": 257675 }, { "epoch": 2.53, "grad_norm": 7.7880144119262695, "learning_rate": 7.314137931034483e-07, "loss": 0.0759, "step": 257700 }, { "epoch": 2.53, "grad_norm": 3.1970953941345215, "learning_rate": 7.309827586206896e-07, "loss": 0.1606, "step": 257725 }, { "epoch": 2.53, "grad_norm": 8.520716667175293, "learning_rate": 7.30551724137931e-07, "loss": 0.1006, "step": 257750 }, { "epoch": 2.53, "grad_norm": 4.32033109664917, "learning_rate": 7.301206896551725e-07, "loss": 0.1589, "step": 257775 }, { "epoch": 2.53, "grad_norm": 23.3703670501709, "learning_rate": 7.296896551724139e-07, "loss": 0.1136, "step": 257800 }, { "epoch": 2.53, "grad_norm": 2.723890542984009, "learning_rate": 7.292586206896553e-07, "loss": 0.1937, "step": 257825 }, { "epoch": 2.54, "grad_norm": 12.859326362609863, "learning_rate": 7.288275862068966e-07, "loss": 0.0892, "step": 257850 }, { "epoch": 2.54, "grad_norm": 4.560240745544434, "learning_rate": 7.28396551724138e-07, "loss": 0.183, "step": 257875 }, { "epoch": 2.54, "grad_norm": 9.11144733428955, "learning_rate": 7.279655172413794e-07, "loss": 0.0878, "step": 257900 }, { "epoch": 2.54, "grad_norm": 4.202302932739258, "learning_rate": 7.275344827586208e-07, "loss": 0.1688, "step": 257925 }, { "epoch": 2.54, "grad_norm": 15.862324714660645, "learning_rate": 7.271034482758621e-07, "loss": 0.1071, "step": 257950 }, { "epoch": 2.54, "grad_norm": 6.352775573730469, "learning_rate": 7.266724137931035e-07, "loss": 0.2184, "step": 257975 }, { "epoch": 2.54, "grad_norm": 16.161640167236328, "learning_rate": 7.262413793103448e-07, "loss": 0.0965, "step": 258000 }, { "epoch": 2.54, "grad_norm": 7.272350311279297, "learning_rate": 7.258103448275864e-07, "loss": 0.1762, "step": 258025 }, { "epoch": 2.54, "grad_norm": 10.995333671569824, "learning_rate": 7.253793103448277e-07, "loss": 0.0793, "step": 258050 }, { "epoch": 2.54, "grad_norm": 2.6373140811920166, "learning_rate": 7.249482758620691e-07, "loss": 0.1561, "step": 258075 }, { "epoch": 2.54, "grad_norm": 15.471944808959961, "learning_rate": 7.245172413793104e-07, "loss": 0.1182, "step": 258100 }, { "epoch": 2.54, "grad_norm": 1.279740810394287, "learning_rate": 7.241034482758621e-07, "loss": 0.1895, "step": 258125 }, { "epoch": 2.54, "grad_norm": 9.043105125427246, "learning_rate": 7.236724137931036e-07, "loss": 0.105, "step": 258150 }, { "epoch": 2.54, "grad_norm": 3.6341986656188965, "learning_rate": 7.232413793103449e-07, "loss": 0.2146, "step": 258175 }, { "epoch": 2.54, "grad_norm": 12.43305492401123, "learning_rate": 7.228103448275863e-07, "loss": 0.0775, "step": 258200 }, { "epoch": 2.54, "grad_norm": 2.8233816623687744, "learning_rate": 7.223793103448276e-07, "loss": 0.193, "step": 258225 }, { "epoch": 2.54, "grad_norm": 12.17225456237793, "learning_rate": 7.21948275862069e-07, "loss": 0.0977, "step": 258250 }, { "epoch": 2.54, "grad_norm": 4.535813808441162, "learning_rate": 7.215172413793103e-07, "loss": 0.1804, "step": 258275 }, { "epoch": 2.54, "grad_norm": 13.499894142150879, "learning_rate": 7.210862068965519e-07, "loss": 0.0806, "step": 258300 }, { "epoch": 2.54, "grad_norm": 3.0480101108551025, "learning_rate": 7.206551724137932e-07, "loss": 0.1665, "step": 258325 }, { "epoch": 2.54, "grad_norm": 8.659439086914062, "learning_rate": 7.202241379310346e-07, "loss": 0.0904, "step": 258350 }, { "epoch": 2.54, "grad_norm": 2.259251832962036, "learning_rate": 7.197931034482759e-07, "loss": 0.1607, "step": 258375 }, { "epoch": 2.54, "grad_norm": 9.63401985168457, "learning_rate": 7.193620689655173e-07, "loss": 0.0957, "step": 258400 }, { "epoch": 2.54, "grad_norm": 1.2991617918014526, "learning_rate": 7.189310344827586e-07, "loss": 0.1563, "step": 258425 }, { "epoch": 2.54, "grad_norm": 10.516326904296875, "learning_rate": 7.185e-07, "loss": 0.0836, "step": 258450 }, { "epoch": 2.54, "grad_norm": 3.178530216217041, "learning_rate": 7.180689655172414e-07, "loss": 0.1629, "step": 258475 }, { "epoch": 2.54, "grad_norm": 8.695456504821777, "learning_rate": 7.176379310344828e-07, "loss": 0.0809, "step": 258500 }, { "epoch": 2.54, "grad_norm": 0.41950735449790955, "learning_rate": 7.172068965517242e-07, "loss": 0.1601, "step": 258525 }, { "epoch": 2.54, "grad_norm": 7.393265724182129, "learning_rate": 7.167758620689656e-07, "loss": 0.086, "step": 258550 }, { "epoch": 2.54, "grad_norm": 4.258738994598389, "learning_rate": 7.163448275862069e-07, "loss": 0.1737, "step": 258575 }, { "epoch": 2.54, "grad_norm": 5.288272857666016, "learning_rate": 7.159137931034484e-07, "loss": 0.1242, "step": 258600 }, { "epoch": 2.54, "grad_norm": 4.579852104187012, "learning_rate": 7.154827586206898e-07, "loss": 0.1857, "step": 258625 }, { "epoch": 2.54, "grad_norm": 13.07597827911377, "learning_rate": 7.150517241379311e-07, "loss": 0.1224, "step": 258650 }, { "epoch": 2.54, "grad_norm": 3.65142560005188, "learning_rate": 7.146206896551725e-07, "loss": 0.1711, "step": 258675 }, { "epoch": 2.54, "grad_norm": 10.245556831359863, "learning_rate": 7.141896551724138e-07, "loss": 0.1148, "step": 258700 }, { "epoch": 2.54, "grad_norm": 2.0222012996673584, "learning_rate": 7.137586206896552e-07, "loss": 0.1509, "step": 258725 }, { "epoch": 2.54, "grad_norm": 11.142765998840332, "learning_rate": 7.133275862068965e-07, "loss": 0.115, "step": 258750 }, { "epoch": 2.54, "grad_norm": 5.954310894012451, "learning_rate": 7.128965517241381e-07, "loss": 0.1711, "step": 258775 }, { "epoch": 2.54, "grad_norm": 7.714773654937744, "learning_rate": 7.124655172413794e-07, "loss": 0.1041, "step": 258800 }, { "epoch": 2.54, "grad_norm": 1.6494466066360474, "learning_rate": 7.120344827586208e-07, "loss": 0.168, "step": 258825 }, { "epoch": 2.55, "grad_norm": 8.745320320129395, "learning_rate": 7.116034482758621e-07, "loss": 0.0884, "step": 258850 }, { "epoch": 2.55, "grad_norm": 3.1277244091033936, "learning_rate": 7.111724137931035e-07, "loss": 0.1942, "step": 258875 }, { "epoch": 2.55, "grad_norm": 12.61888313293457, "learning_rate": 7.107413793103448e-07, "loss": 0.0963, "step": 258900 }, { "epoch": 2.55, "grad_norm": 2.3710224628448486, "learning_rate": 7.103103448275863e-07, "loss": 0.1284, "step": 258925 }, { "epoch": 2.55, "grad_norm": 10.312837600708008, "learning_rate": 7.098793103448276e-07, "loss": 0.1035, "step": 258950 }, { "epoch": 2.55, "grad_norm": 3.510746479034424, "learning_rate": 7.09448275862069e-07, "loss": 0.1462, "step": 258975 }, { "epoch": 2.55, "grad_norm": 14.818126678466797, "learning_rate": 7.090172413793104e-07, "loss": 0.0923, "step": 259000 }, { "epoch": 2.55, "grad_norm": 1.1374449729919434, "learning_rate": 7.085862068965518e-07, "loss": 0.1528, "step": 259025 }, { "epoch": 2.55, "grad_norm": 6.664443016052246, "learning_rate": 7.081551724137932e-07, "loss": 0.0814, "step": 259050 }, { "epoch": 2.55, "grad_norm": 4.338473320007324, "learning_rate": 7.077241379310346e-07, "loss": 0.2091, "step": 259075 }, { "epoch": 2.55, "grad_norm": 20.290424346923828, "learning_rate": 7.072931034482759e-07, "loss": 0.0961, "step": 259100 }, { "epoch": 2.55, "grad_norm": 4.09880256652832, "learning_rate": 7.068620689655173e-07, "loss": 0.2138, "step": 259125 }, { "epoch": 2.55, "grad_norm": 11.85567569732666, "learning_rate": 7.064310344827586e-07, "loss": 0.111, "step": 259150 }, { "epoch": 2.55, "grad_norm": 3.4040300846099854, "learning_rate": 7.06e-07, "loss": 0.1607, "step": 259175 }, { "epoch": 2.55, "grad_norm": 8.315666198730469, "learning_rate": 7.055689655172413e-07, "loss": 0.0728, "step": 259200 }, { "epoch": 2.55, "grad_norm": 1.9832528829574585, "learning_rate": 7.051379310344827e-07, "loss": 0.1753, "step": 259225 }, { "epoch": 2.55, "grad_norm": 15.313522338867188, "learning_rate": 7.047068965517243e-07, "loss": 0.1309, "step": 259250 }, { "epoch": 2.55, "grad_norm": 4.260916709899902, "learning_rate": 7.042758620689656e-07, "loss": 0.1559, "step": 259275 }, { "epoch": 2.55, "grad_norm": 11.635127067565918, "learning_rate": 7.03844827586207e-07, "loss": 0.07, "step": 259300 }, { "epoch": 2.55, "grad_norm": 2.844816207885742, "learning_rate": 7.034137931034483e-07, "loss": 0.2221, "step": 259325 }, { "epoch": 2.55, "grad_norm": 9.285008430480957, "learning_rate": 7.029827586206897e-07, "loss": 0.1066, "step": 259350 }, { "epoch": 2.55, "grad_norm": 6.1687188148498535, "learning_rate": 7.025517241379311e-07, "loss": 0.1898, "step": 259375 }, { "epoch": 2.55, "grad_norm": 9.823017120361328, "learning_rate": 7.021206896551725e-07, "loss": 0.0762, "step": 259400 }, { "epoch": 2.55, "grad_norm": 8.513528823852539, "learning_rate": 7.016896551724138e-07, "loss": 0.173, "step": 259425 }, { "epoch": 2.55, "grad_norm": 5.320058345794678, "learning_rate": 7.012586206896552e-07, "loss": 0.0876, "step": 259450 }, { "epoch": 2.55, "grad_norm": 3.6394505500793457, "learning_rate": 7.008275862068966e-07, "loss": 0.174, "step": 259475 }, { "epoch": 2.55, "grad_norm": 12.893271446228027, "learning_rate": 7.003965517241381e-07, "loss": 0.0909, "step": 259500 }, { "epoch": 2.55, "grad_norm": 4.857639312744141, "learning_rate": 6.999655172413794e-07, "loss": 0.1881, "step": 259525 }, { "epoch": 2.55, "grad_norm": 12.997801780700684, "learning_rate": 6.995344827586208e-07, "loss": 0.1014, "step": 259550 }, { "epoch": 2.55, "grad_norm": 4.917912483215332, "learning_rate": 6.991034482758621e-07, "loss": 0.1684, "step": 259575 }, { "epoch": 2.55, "grad_norm": 12.26303768157959, "learning_rate": 6.986724137931035e-07, "loss": 0.0931, "step": 259600 }, { "epoch": 2.55, "grad_norm": 0.2087349146604538, "learning_rate": 6.982413793103448e-07, "loss": 0.208, "step": 259625 }, { "epoch": 2.55, "grad_norm": 15.124650001525879, "learning_rate": 6.978103448275862e-07, "loss": 0.0951, "step": 259650 }, { "epoch": 2.55, "grad_norm": 4.142449855804443, "learning_rate": 6.973793103448275e-07, "loss": 0.1815, "step": 259675 }, { "epoch": 2.55, "grad_norm": 9.978788375854492, "learning_rate": 6.969482758620691e-07, "loss": 0.0899, "step": 259700 }, { "epoch": 2.55, "grad_norm": 5.190655708312988, "learning_rate": 6.965172413793104e-07, "loss": 0.2119, "step": 259725 }, { "epoch": 2.55, "grad_norm": 14.84843921661377, "learning_rate": 6.960862068965518e-07, "loss": 0.1147, "step": 259750 }, { "epoch": 2.55, "grad_norm": 6.770528793334961, "learning_rate": 6.956551724137932e-07, "loss": 0.1668, "step": 259775 }, { "epoch": 2.55, "grad_norm": 4.281104564666748, "learning_rate": 6.952241379310345e-07, "loss": 0.1101, "step": 259800 }, { "epoch": 2.55, "grad_norm": 6.994060516357422, "learning_rate": 6.94793103448276e-07, "loss": 0.159, "step": 259825 }, { "epoch": 2.55, "grad_norm": 12.300970077514648, "learning_rate": 6.943620689655173e-07, "loss": 0.1318, "step": 259850 }, { "epoch": 2.56, "grad_norm": 9.151687622070312, "learning_rate": 6.939310344827587e-07, "loss": 0.1769, "step": 259875 }, { "epoch": 2.56, "grad_norm": 9.016443252563477, "learning_rate": 6.935e-07, "loss": 0.0962, "step": 259900 }, { "epoch": 2.56, "grad_norm": 3.417917013168335, "learning_rate": 6.930689655172414e-07, "loss": 0.182, "step": 259925 }, { "epoch": 2.56, "grad_norm": 12.256309509277344, "learning_rate": 6.926379310344829e-07, "loss": 0.1053, "step": 259950 }, { "epoch": 2.56, "grad_norm": 7.138642311096191, "learning_rate": 6.922068965517243e-07, "loss": 0.1565, "step": 259975 }, { "epoch": 2.56, "grad_norm": 13.268507957458496, "learning_rate": 6.917758620689656e-07, "loss": 0.0959, "step": 260000 }, { "epoch": 2.56, "eval_loss": 0.6183231472969055, "eval_runtime": 5941.7378, "eval_samples_per_second": 1.593, "eval_steps_per_second": 0.199, "eval_wer": 0.11565698834738943, "step": 260000 }, { "epoch": 2.56, "grad_norm": 6.267670154571533, "learning_rate": 6.91344827586207e-07, "loss": 0.1643, "step": 260025 }, { "epoch": 2.56, "grad_norm": 8.023737907409668, "learning_rate": 6.909137931034483e-07, "loss": 0.0966, "step": 260050 }, { "epoch": 2.56, "grad_norm": 6.08717155456543, "learning_rate": 6.904827586206897e-07, "loss": 0.2056, "step": 260075 }, { "epoch": 2.56, "grad_norm": 7.656101226806641, "learning_rate": 6.90051724137931e-07, "loss": 0.091, "step": 260100 }, { "epoch": 2.56, "grad_norm": 6.003302097320557, "learning_rate": 6.896206896551724e-07, "loss": 0.1482, "step": 260125 }, { "epoch": 2.56, "grad_norm": 8.43018913269043, "learning_rate": 6.891896551724138e-07, "loss": 0.0804, "step": 260150 }, { "epoch": 2.56, "grad_norm": 2.2734615802764893, "learning_rate": 6.887586206896553e-07, "loss": 0.1568, "step": 260175 }, { "epoch": 2.56, "grad_norm": 10.231352806091309, "learning_rate": 6.883275862068966e-07, "loss": 0.0897, "step": 260200 }, { "epoch": 2.56, "grad_norm": 4.536520004272461, "learning_rate": 6.87896551724138e-07, "loss": 0.1737, "step": 260225 }, { "epoch": 2.56, "grad_norm": 14.56501579284668, "learning_rate": 6.874655172413793e-07, "loss": 0.1176, "step": 260250 }, { "epoch": 2.56, "grad_norm": 5.275416851043701, "learning_rate": 6.870344827586208e-07, "loss": 0.1729, "step": 260275 }, { "epoch": 2.56, "grad_norm": 7.135717391967773, "learning_rate": 6.866034482758621e-07, "loss": 0.0939, "step": 260300 }, { "epoch": 2.56, "grad_norm": 7.00016450881958, "learning_rate": 6.861724137931035e-07, "loss": 0.1923, "step": 260325 }, { "epoch": 2.56, "grad_norm": 5.4773969650268555, "learning_rate": 6.857413793103448e-07, "loss": 0.1149, "step": 260350 }, { "epoch": 2.56, "grad_norm": 2.954329013824463, "learning_rate": 6.853103448275862e-07, "loss": 0.1929, "step": 260375 }, { "epoch": 2.56, "grad_norm": 10.637590408325195, "learning_rate": 6.848793103448276e-07, "loss": 0.0894, "step": 260400 }, { "epoch": 2.56, "grad_norm": 5.355125427246094, "learning_rate": 6.844482758620691e-07, "loss": 0.2048, "step": 260425 }, { "epoch": 2.56, "grad_norm": 10.81634521484375, "learning_rate": 6.840172413793105e-07, "loss": 0.0868, "step": 260450 }, { "epoch": 2.56, "grad_norm": 1.425285816192627, "learning_rate": 6.835862068965518e-07, "loss": 0.1646, "step": 260475 }, { "epoch": 2.56, "grad_norm": 11.864431381225586, "learning_rate": 6.831551724137932e-07, "loss": 0.088, "step": 260500 }, { "epoch": 2.56, "grad_norm": 2.9584031105041504, "learning_rate": 6.827413793103449e-07, "loss": 0.1529, "step": 260525 }, { "epoch": 2.56, "grad_norm": 14.288992881774902, "learning_rate": 6.823103448275863e-07, "loss": 0.0958, "step": 260550 }, { "epoch": 2.56, "grad_norm": 2.5849266052246094, "learning_rate": 6.818793103448276e-07, "loss": 0.1513, "step": 260575 }, { "epoch": 2.56, "grad_norm": 9.13752269744873, "learning_rate": 6.81448275862069e-07, "loss": 0.0896, "step": 260600 }, { "epoch": 2.56, "grad_norm": 6.391273021697998, "learning_rate": 6.810172413793103e-07, "loss": 0.1887, "step": 260625 }, { "epoch": 2.56, "grad_norm": 7.401609420776367, "learning_rate": 6.805862068965517e-07, "loss": 0.0847, "step": 260650 }, { "epoch": 2.56, "grad_norm": 5.425978183746338, "learning_rate": 6.801551724137933e-07, "loss": 0.1809, "step": 260675 }, { "epoch": 2.56, "grad_norm": 11.117874145507812, "learning_rate": 6.797241379310346e-07, "loss": 0.1071, "step": 260700 }, { "epoch": 2.56, "grad_norm": 4.182737350463867, "learning_rate": 6.79293103448276e-07, "loss": 0.2089, "step": 260725 }, { "epoch": 2.56, "grad_norm": 6.903774261474609, "learning_rate": 6.788620689655173e-07, "loss": 0.1082, "step": 260750 }, { "epoch": 2.56, "grad_norm": 2.055983543395996, "learning_rate": 6.784310344827587e-07, "loss": 0.1589, "step": 260775 }, { "epoch": 2.56, "grad_norm": 10.216694831848145, "learning_rate": 6.78e-07, "loss": 0.1069, "step": 260800 }, { "epoch": 2.56, "grad_norm": 0.511448860168457, "learning_rate": 6.775689655172414e-07, "loss": 0.1718, "step": 260825 }, { "epoch": 2.56, "grad_norm": 11.526544570922852, "learning_rate": 6.771379310344828e-07, "loss": 0.1025, "step": 260850 }, { "epoch": 2.56, "grad_norm": 5.000148296356201, "learning_rate": 6.767068965517242e-07, "loss": 0.1943, "step": 260875 }, { "epoch": 2.57, "grad_norm": 8.70453929901123, "learning_rate": 6.762758620689655e-07, "loss": 0.0887, "step": 260900 }, { "epoch": 2.57, "grad_norm": 6.737924098968506, "learning_rate": 6.75844827586207e-07, "loss": 0.1679, "step": 260925 }, { "epoch": 2.57, "grad_norm": 4.89450216293335, "learning_rate": 6.754137931034483e-07, "loss": 0.0712, "step": 260950 }, { "epoch": 2.57, "grad_norm": 0.04272085055708885, "learning_rate": 6.749827586206898e-07, "loss": 0.1876, "step": 260975 }, { "epoch": 2.57, "grad_norm": 7.5010786056518555, "learning_rate": 6.745517241379311e-07, "loss": 0.0733, "step": 261000 }, { "epoch": 2.57, "grad_norm": 2.5651371479034424, "learning_rate": 6.741206896551725e-07, "loss": 0.1596, "step": 261025 }, { "epoch": 2.57, "grad_norm": 6.103785037994385, "learning_rate": 6.736896551724138e-07, "loss": 0.1101, "step": 261050 }, { "epoch": 2.57, "grad_norm": 4.606994152069092, "learning_rate": 6.732586206896552e-07, "loss": 0.1552, "step": 261075 }, { "epoch": 2.57, "grad_norm": 31.98235321044922, "learning_rate": 6.728275862068965e-07, "loss": 0.108, "step": 261100 }, { "epoch": 2.57, "grad_norm": 4.80152702331543, "learning_rate": 6.723965517241379e-07, "loss": 0.1738, "step": 261125 }, { "epoch": 2.57, "grad_norm": 5.376169681549072, "learning_rate": 6.719655172413794e-07, "loss": 0.0865, "step": 261150 }, { "epoch": 2.57, "grad_norm": 4.178921699523926, "learning_rate": 6.715344827586208e-07, "loss": 0.1501, "step": 261175 }, { "epoch": 2.57, "grad_norm": 9.670741081237793, "learning_rate": 6.711034482758621e-07, "loss": 0.1287, "step": 261200 }, { "epoch": 2.57, "grad_norm": 2.5314648151397705, "learning_rate": 6.706724137931035e-07, "loss": 0.1606, "step": 261225 }, { "epoch": 2.57, "grad_norm": 12.057476997375488, "learning_rate": 6.702413793103448e-07, "loss": 0.1058, "step": 261250 }, { "epoch": 2.57, "grad_norm": 0.7747664451599121, "learning_rate": 6.698103448275862e-07, "loss": 0.1587, "step": 261275 }, { "epoch": 2.57, "grad_norm": 5.303198337554932, "learning_rate": 6.693793103448277e-07, "loss": 0.0751, "step": 261300 }, { "epoch": 2.57, "grad_norm": 5.2717437744140625, "learning_rate": 6.68948275862069e-07, "loss": 0.1796, "step": 261325 }, { "epoch": 2.57, "grad_norm": 8.819592475891113, "learning_rate": 6.685172413793104e-07, "loss": 0.1119, "step": 261350 }, { "epoch": 2.57, "grad_norm": 4.570291519165039, "learning_rate": 6.680862068965518e-07, "loss": 0.1491, "step": 261375 }, { "epoch": 2.57, "grad_norm": 7.686412334442139, "learning_rate": 6.676551724137932e-07, "loss": 0.0831, "step": 261400 }, { "epoch": 2.57, "grad_norm": 3.914363384246826, "learning_rate": 6.672241379310346e-07, "loss": 0.1734, "step": 261425 }, { "epoch": 2.57, "grad_norm": 12.915739059448242, "learning_rate": 6.66793103448276e-07, "loss": 0.0929, "step": 261450 }, { "epoch": 2.57, "grad_norm": 7.37044620513916, "learning_rate": 6.663620689655173e-07, "loss": 0.2064, "step": 261475 }, { "epoch": 2.57, "grad_norm": 4.9987030029296875, "learning_rate": 6.659310344827587e-07, "loss": 0.0993, "step": 261500 }, { "epoch": 2.57, "grad_norm": 2.588848352432251, "learning_rate": 6.655e-07, "loss": 0.1743, "step": 261525 }, { "epoch": 2.57, "grad_norm": 23.06988525390625, "learning_rate": 6.650689655172414e-07, "loss": 0.1157, "step": 261550 }, { "epoch": 2.57, "grad_norm": 2.578333616256714, "learning_rate": 6.646379310344827e-07, "loss": 0.1601, "step": 261575 }, { "epoch": 2.57, "grad_norm": 9.05195140838623, "learning_rate": 6.642068965517242e-07, "loss": 0.0848, "step": 261600 }, { "epoch": 2.57, "grad_norm": 4.11098051071167, "learning_rate": 6.637758620689656e-07, "loss": 0.177, "step": 261625 }, { "epoch": 2.57, "grad_norm": 7.566025257110596, "learning_rate": 6.63344827586207e-07, "loss": 0.1028, "step": 261650 }, { "epoch": 2.57, "grad_norm": 3.402874708175659, "learning_rate": 6.629137931034483e-07, "loss": 0.1468, "step": 261675 }, { "epoch": 2.57, "grad_norm": 11.904208183288574, "learning_rate": 6.624827586206897e-07, "loss": 0.0878, "step": 261700 }, { "epoch": 2.57, "grad_norm": 2.0983238220214844, "learning_rate": 6.62051724137931e-07, "loss": 0.1855, "step": 261725 }, { "epoch": 2.57, "grad_norm": 16.641952514648438, "learning_rate": 6.616206896551725e-07, "loss": 0.1113, "step": 261750 }, { "epoch": 2.57, "grad_norm": 7.558927059173584, "learning_rate": 6.611896551724138e-07, "loss": 0.1845, "step": 261775 }, { "epoch": 2.57, "grad_norm": 5.821640968322754, "learning_rate": 6.607586206896552e-07, "loss": 0.0691, "step": 261800 }, { "epoch": 2.57, "grad_norm": 3.110111713409424, "learning_rate": 6.603275862068965e-07, "loss": 0.1771, "step": 261825 }, { "epoch": 2.57, "grad_norm": 10.381390571594238, "learning_rate": 6.59896551724138e-07, "loss": 0.1062, "step": 261850 }, { "epoch": 2.57, "grad_norm": 2.62323260307312, "learning_rate": 6.594655172413795e-07, "loss": 0.1484, "step": 261875 }, { "epoch": 2.58, "grad_norm": 10.91920280456543, "learning_rate": 6.590344827586208e-07, "loss": 0.1254, "step": 261900 }, { "epoch": 2.58, "grad_norm": 2.141840934753418, "learning_rate": 6.586034482758622e-07, "loss": 0.1824, "step": 261925 }, { "epoch": 2.58, "grad_norm": 7.0089898109436035, "learning_rate": 6.581724137931035e-07, "loss": 0.0918, "step": 261950 }, { "epoch": 2.58, "grad_norm": 2.784552574157715, "learning_rate": 6.577413793103449e-07, "loss": 0.1949, "step": 261975 }, { "epoch": 2.58, "grad_norm": 9.445556640625, "learning_rate": 6.573103448275862e-07, "loss": 0.0949, "step": 262000 }, { "epoch": 2.58, "grad_norm": 4.495298862457275, "learning_rate": 6.568793103448276e-07, "loss": 0.2005, "step": 262025 }, { "epoch": 2.58, "grad_norm": 10.905289649963379, "learning_rate": 6.564482758620689e-07, "loss": 0.0897, "step": 262050 }, { "epoch": 2.58, "grad_norm": 7.779533863067627, "learning_rate": 6.560172413793104e-07, "loss": 0.1931, "step": 262075 }, { "epoch": 2.58, "grad_norm": 8.75224494934082, "learning_rate": 6.555862068965518e-07, "loss": 0.0614, "step": 262100 }, { "epoch": 2.58, "grad_norm": 2.843932867050171, "learning_rate": 6.551551724137932e-07, "loss": 0.1918, "step": 262125 }, { "epoch": 2.58, "grad_norm": 10.303285598754883, "learning_rate": 6.547241379310345e-07, "loss": 0.0708, "step": 262150 }, { "epoch": 2.58, "grad_norm": 3.3112051486968994, "learning_rate": 6.542931034482759e-07, "loss": 0.1662, "step": 262175 }, { "epoch": 2.58, "grad_norm": 11.171133995056152, "learning_rate": 6.538620689655173e-07, "loss": 0.1174, "step": 262200 }, { "epoch": 2.58, "grad_norm": 3.5971415042877197, "learning_rate": 6.534310344827587e-07, "loss": 0.1869, "step": 262225 }, { "epoch": 2.58, "grad_norm": 6.521094799041748, "learning_rate": 6.53e-07, "loss": 0.0995, "step": 262250 }, { "epoch": 2.58, "grad_norm": 1.465580940246582, "learning_rate": 6.525689655172414e-07, "loss": 0.1545, "step": 262275 }, { "epoch": 2.58, "grad_norm": 11.75094223022461, "learning_rate": 6.521379310344827e-07, "loss": 0.0736, "step": 262300 }, { "epoch": 2.58, "grad_norm": 4.2532830238342285, "learning_rate": 6.517068965517243e-07, "loss": 0.1971, "step": 262325 }, { "epoch": 2.58, "grad_norm": 7.549051284790039, "learning_rate": 6.512758620689657e-07, "loss": 0.0857, "step": 262350 }, { "epoch": 2.58, "grad_norm": 4.732475280761719, "learning_rate": 6.50844827586207e-07, "loss": 0.191, "step": 262375 }, { "epoch": 2.58, "grad_norm": 7.365817070007324, "learning_rate": 6.504137931034484e-07, "loss": 0.0674, "step": 262400 }, { "epoch": 2.58, "grad_norm": 0.9097816944122314, "learning_rate": 6.499827586206897e-07, "loss": 0.151, "step": 262425 }, { "epoch": 2.58, "grad_norm": 10.96088695526123, "learning_rate": 6.495517241379311e-07, "loss": 0.1048, "step": 262450 }, { "epoch": 2.58, "grad_norm": 4.094757556915283, "learning_rate": 6.491206896551724e-07, "loss": 0.1591, "step": 262475 }, { "epoch": 2.58, "grad_norm": 12.083905220031738, "learning_rate": 6.486896551724138e-07, "loss": 0.0878, "step": 262500 }, { "epoch": 2.58, "grad_norm": 4.307653903961182, "learning_rate": 6.482586206896552e-07, "loss": 0.1816, "step": 262525 }, { "epoch": 2.58, "grad_norm": 6.399886608123779, "learning_rate": 6.478275862068966e-07, "loss": 0.1243, "step": 262550 }, { "epoch": 2.58, "grad_norm": 0.3678426742553711, "learning_rate": 6.47396551724138e-07, "loss": 0.1271, "step": 262575 }, { "epoch": 2.58, "grad_norm": 10.57888412475586, "learning_rate": 6.469655172413794e-07, "loss": 0.1119, "step": 262600 }, { "epoch": 2.58, "grad_norm": 8.3126859664917, "learning_rate": 6.465344827586207e-07, "loss": 0.1749, "step": 262625 }, { "epoch": 2.58, "grad_norm": 6.514589309692383, "learning_rate": 6.461034482758622e-07, "loss": 0.0907, "step": 262650 }, { "epoch": 2.58, "grad_norm": 4.503445625305176, "learning_rate": 6.456724137931035e-07, "loss": 0.1687, "step": 262675 }, { "epoch": 2.58, "grad_norm": 12.449435234069824, "learning_rate": 6.452413793103449e-07, "loss": 0.0934, "step": 262700 }, { "epoch": 2.58, "grad_norm": 5.063565731048584, "learning_rate": 6.448103448275862e-07, "loss": 0.2102, "step": 262725 }, { "epoch": 2.58, "grad_norm": 19.034399032592773, "learning_rate": 6.443793103448276e-07, "loss": 0.1055, "step": 262750 }, { "epoch": 2.58, "grad_norm": 4.513984203338623, "learning_rate": 6.439482758620689e-07, "loss": 0.1814, "step": 262775 }, { "epoch": 2.58, "grad_norm": 13.548857688903809, "learning_rate": 6.435172413793105e-07, "loss": 0.1005, "step": 262800 }, { "epoch": 2.58, "grad_norm": 3.6987383365631104, "learning_rate": 6.430862068965518e-07, "loss": 0.186, "step": 262825 }, { "epoch": 2.58, "grad_norm": 19.6373291015625, "learning_rate": 6.426551724137932e-07, "loss": 0.1198, "step": 262850 }, { "epoch": 2.58, "grad_norm": 3.5826621055603027, "learning_rate": 6.422241379310345e-07, "loss": 0.168, "step": 262875 }, { "epoch": 2.58, "grad_norm": 7.995766639709473, "learning_rate": 6.417931034482759e-07, "loss": 0.0807, "step": 262900 }, { "epoch": 2.59, "grad_norm": 5.023298740386963, "learning_rate": 6.413620689655172e-07, "loss": 0.1986, "step": 262925 }, { "epoch": 2.59, "grad_norm": 5.42043399810791, "learning_rate": 6.409310344827586e-07, "loss": 0.0804, "step": 262950 }, { "epoch": 2.59, "grad_norm": 3.3241612911224365, "learning_rate": 6.405e-07, "loss": 0.1793, "step": 262975 }, { "epoch": 2.59, "grad_norm": 15.738428115844727, "learning_rate": 6.400689655172414e-07, "loss": 0.1021, "step": 263000 }, { "epoch": 2.59, "grad_norm": 4.657568454742432, "learning_rate": 6.396551724137931e-07, "loss": 0.1874, "step": 263025 }, { "epoch": 2.59, "grad_norm": 6.775152206420898, "learning_rate": 6.392241379310344e-07, "loss": 0.095, "step": 263050 }, { "epoch": 2.59, "grad_norm": 4.295499324798584, "learning_rate": 6.38793103448276e-07, "loss": 0.1699, "step": 263075 }, { "epoch": 2.59, "grad_norm": 12.773590087890625, "learning_rate": 6.383620689655173e-07, "loss": 0.0936, "step": 263100 }, { "epoch": 2.59, "grad_norm": 5.615166187286377, "learning_rate": 6.379310344827587e-07, "loss": 0.2077, "step": 263125 }, { "epoch": 2.59, "grad_norm": 10.903881072998047, "learning_rate": 6.375e-07, "loss": 0.1037, "step": 263150 }, { "epoch": 2.59, "grad_norm": 1.7424699068069458, "learning_rate": 6.370689655172414e-07, "loss": 0.1764, "step": 263175 }, { "epoch": 2.59, "grad_norm": 9.712655067443848, "learning_rate": 6.366379310344828e-07, "loss": 0.0996, "step": 263200 }, { "epoch": 2.59, "grad_norm": 4.781904220581055, "learning_rate": 6.362068965517242e-07, "loss": 0.17, "step": 263225 }, { "epoch": 2.59, "grad_norm": 18.136943817138672, "learning_rate": 6.357758620689655e-07, "loss": 0.1143, "step": 263250 }, { "epoch": 2.59, "grad_norm": 4.147569179534912, "learning_rate": 6.353448275862069e-07, "loss": 0.1886, "step": 263275 }, { "epoch": 2.59, "grad_norm": 10.10344409942627, "learning_rate": 6.349137931034484e-07, "loss": 0.088, "step": 263300 }, { "epoch": 2.59, "grad_norm": 2.8450450897216797, "learning_rate": 6.344827586206897e-07, "loss": 0.1665, "step": 263325 }, { "epoch": 2.59, "grad_norm": 4.860743999481201, "learning_rate": 6.340517241379312e-07, "loss": 0.1007, "step": 263350 }, { "epoch": 2.59, "grad_norm": 5.351719856262207, "learning_rate": 6.336206896551725e-07, "loss": 0.1598, "step": 263375 }, { "epoch": 2.59, "grad_norm": 7.5707292556762695, "learning_rate": 6.331896551724139e-07, "loss": 0.0844, "step": 263400 }, { "epoch": 2.59, "grad_norm": 2.409740447998047, "learning_rate": 6.327586206896552e-07, "loss": 0.1612, "step": 263425 }, { "epoch": 2.59, "grad_norm": 5.080662250518799, "learning_rate": 6.323275862068966e-07, "loss": 0.0953, "step": 263450 }, { "epoch": 2.59, "grad_norm": 3.2797915935516357, "learning_rate": 6.318965517241379e-07, "loss": 0.1716, "step": 263475 }, { "epoch": 2.59, "grad_norm": 11.288725852966309, "learning_rate": 6.314655172413793e-07, "loss": 0.1138, "step": 263500 }, { "epoch": 2.59, "grad_norm": 8.138431549072266, "learning_rate": 6.310344827586208e-07, "loss": 0.1829, "step": 263525 }, { "epoch": 2.59, "grad_norm": 11.354215621948242, "learning_rate": 6.306034482758622e-07, "loss": 0.1018, "step": 263550 }, { "epoch": 2.59, "grad_norm": 7.535511493682861, "learning_rate": 6.301724137931035e-07, "loss": 0.16, "step": 263575 }, { "epoch": 2.59, "grad_norm": 8.147040367126465, "learning_rate": 6.297413793103449e-07, "loss": 0.0849, "step": 263600 }, { "epoch": 2.59, "grad_norm": 3.5204715728759766, "learning_rate": 6.293103448275862e-07, "loss": 0.1626, "step": 263625 }, { "epoch": 2.59, "grad_norm": 10.87822437286377, "learning_rate": 6.288793103448277e-07, "loss": 0.1, "step": 263650 }, { "epoch": 2.59, "grad_norm": 2.3140742778778076, "learning_rate": 6.28448275862069e-07, "loss": 0.1705, "step": 263675 }, { "epoch": 2.59, "grad_norm": 11.139158248901367, "learning_rate": 6.280172413793104e-07, "loss": 0.1142, "step": 263700 }, { "epoch": 2.59, "grad_norm": 1.821437120437622, "learning_rate": 6.275862068965517e-07, "loss": 0.2063, "step": 263725 }, { "epoch": 2.59, "grad_norm": 12.996715545654297, "learning_rate": 6.271551724137931e-07, "loss": 0.1083, "step": 263750 }, { "epoch": 2.59, "grad_norm": 6.303211212158203, "learning_rate": 6.267241379310345e-07, "loss": 0.1814, "step": 263775 }, { "epoch": 2.59, "grad_norm": 12.092199325561523, "learning_rate": 6.26293103448276e-07, "loss": 0.1267, "step": 263800 }, { "epoch": 2.59, "grad_norm": 3.559892416000366, "learning_rate": 6.258620689655173e-07, "loss": 0.1564, "step": 263825 }, { "epoch": 2.59, "grad_norm": 9.267663955688477, "learning_rate": 6.254310344827587e-07, "loss": 0.0945, "step": 263850 }, { "epoch": 2.59, "grad_norm": 1.5509254932403564, "learning_rate": 6.25e-07, "loss": 0.154, "step": 263875 }, { "epoch": 2.59, "grad_norm": 6.921869277954102, "learning_rate": 6.245689655172414e-07, "loss": 0.091, "step": 263900 }, { "epoch": 2.59, "grad_norm": 4.296475410461426, "learning_rate": 6.241379310344828e-07, "loss": 0.1983, "step": 263925 }, { "epoch": 2.6, "grad_norm": 12.608116149902344, "learning_rate": 6.237068965517242e-07, "loss": 0.0814, "step": 263950 }, { "epoch": 2.6, "grad_norm": 5.849074840545654, "learning_rate": 6.232758620689656e-07, "loss": 0.1513, "step": 263975 }, { "epoch": 2.6, "grad_norm": 12.272058486938477, "learning_rate": 6.228448275862069e-07, "loss": 0.083, "step": 264000 }, { "epoch": 2.6, "grad_norm": 0.7938090562820435, "learning_rate": 6.224137931034483e-07, "loss": 0.1775, "step": 264025 }, { "epoch": 2.6, "grad_norm": 14.39319133758545, "learning_rate": 6.219827586206897e-07, "loss": 0.102, "step": 264050 }, { "epoch": 2.6, "grad_norm": 4.526266574859619, "learning_rate": 6.215517241379311e-07, "loss": 0.1743, "step": 264075 }, { "epoch": 2.6, "grad_norm": 10.410229682922363, "learning_rate": 6.211206896551724e-07, "loss": 0.0845, "step": 264100 }, { "epoch": 2.6, "grad_norm": 2.4744346141815186, "learning_rate": 6.206896551724139e-07, "loss": 0.1629, "step": 264125 }, { "epoch": 2.6, "grad_norm": 13.21274185180664, "learning_rate": 6.202586206896552e-07, "loss": 0.0984, "step": 264150 }, { "epoch": 2.6, "grad_norm": 5.980197906494141, "learning_rate": 6.198275862068967e-07, "loss": 0.1768, "step": 264175 }, { "epoch": 2.6, "grad_norm": 11.839648246765137, "learning_rate": 6.19396551724138e-07, "loss": 0.1012, "step": 264200 }, { "epoch": 2.6, "grad_norm": 2.897521734237671, "learning_rate": 6.189655172413794e-07, "loss": 0.1816, "step": 264225 }, { "epoch": 2.6, "grad_norm": 5.842688083648682, "learning_rate": 6.185344827586207e-07, "loss": 0.091, "step": 264250 }, { "epoch": 2.6, "grad_norm": 3.667379856109619, "learning_rate": 6.181034482758621e-07, "loss": 0.1908, "step": 264275 }, { "epoch": 2.6, "grad_norm": 10.359471321105957, "learning_rate": 6.176724137931035e-07, "loss": 0.0624, "step": 264300 }, { "epoch": 2.6, "grad_norm": 6.224223613739014, "learning_rate": 6.172413793103449e-07, "loss": 0.2143, "step": 264325 }, { "epoch": 2.6, "grad_norm": 12.738018035888672, "learning_rate": 6.168103448275862e-07, "loss": 0.106, "step": 264350 }, { "epoch": 2.6, "grad_norm": 2.4979679584503174, "learning_rate": 6.163793103448276e-07, "loss": 0.182, "step": 264375 }, { "epoch": 2.6, "grad_norm": 9.040096282958984, "learning_rate": 6.159482758620689e-07, "loss": 0.0808, "step": 264400 }, { "epoch": 2.6, "grad_norm": 3.218620777130127, "learning_rate": 6.155172413793104e-07, "loss": 0.2055, "step": 264425 }, { "epoch": 2.6, "grad_norm": 7.787283420562744, "learning_rate": 6.150862068965517e-07, "loss": 0.1089, "step": 264450 }, { "epoch": 2.6, "grad_norm": 0.9623292088508606, "learning_rate": 6.146551724137931e-07, "loss": 0.1899, "step": 264475 }, { "epoch": 2.6, "grad_norm": 8.050349235534668, "learning_rate": 6.142241379310346e-07, "loss": 0.0962, "step": 264500 }, { "epoch": 2.6, "grad_norm": 3.873450517654419, "learning_rate": 6.137931034482759e-07, "loss": 0.1725, "step": 264525 }, { "epoch": 2.6, "grad_norm": 20.745954513549805, "learning_rate": 6.133620689655173e-07, "loss": 0.1302, "step": 264550 }, { "epoch": 2.6, "grad_norm": 3.632242441177368, "learning_rate": 6.129310344827587e-07, "loss": 0.1667, "step": 264575 }, { "epoch": 2.6, "grad_norm": 10.600410461425781, "learning_rate": 6.125000000000001e-07, "loss": 0.0839, "step": 264600 }, { "epoch": 2.6, "grad_norm": 3.5731358528137207, "learning_rate": 6.120689655172414e-07, "loss": 0.1866, "step": 264625 }, { "epoch": 2.6, "grad_norm": 5.829722881317139, "learning_rate": 6.116379310344829e-07, "loss": 0.0793, "step": 264650 }, { "epoch": 2.6, "grad_norm": 6.092685222625732, "learning_rate": 6.112068965517242e-07, "loss": 0.1482, "step": 264675 }, { "epoch": 2.6, "grad_norm": 12.191843032836914, "learning_rate": 6.107758620689656e-07, "loss": 0.0939, "step": 264700 }, { "epoch": 2.6, "grad_norm": 4.569920063018799, "learning_rate": 6.103448275862069e-07, "loss": 0.1613, "step": 264725 }, { "epoch": 2.6, "grad_norm": 10.625999450683594, "learning_rate": 6.099137931034483e-07, "loss": 0.1008, "step": 264750 }, { "epoch": 2.6, "grad_norm": 8.575743675231934, "learning_rate": 6.094827586206897e-07, "loss": 0.1505, "step": 264775 }, { "epoch": 2.6, "grad_norm": 16.55060577392578, "learning_rate": 6.090517241379311e-07, "loss": 0.1191, "step": 264800 }, { "epoch": 2.6, "grad_norm": 7.0615363121032715, "learning_rate": 6.086206896551724e-07, "loss": 0.1809, "step": 264825 }, { "epoch": 2.6, "grad_norm": 6.699952602386475, "learning_rate": 6.081896551724138e-07, "loss": 0.1027, "step": 264850 }, { "epoch": 2.6, "grad_norm": 8.753190994262695, "learning_rate": 6.077586206896552e-07, "loss": 0.1822, "step": 264875 }, { "epoch": 2.6, "grad_norm": 10.314652442932129, "learning_rate": 6.073275862068966e-07, "loss": 0.0954, "step": 264900 }, { "epoch": 2.6, "grad_norm": 1.2666223049163818, "learning_rate": 6.068965517241379e-07, "loss": 0.2084, "step": 264925 }, { "epoch": 2.61, "grad_norm": 10.830958366394043, "learning_rate": 6.064655172413794e-07, "loss": 0.1031, "step": 264950 }, { "epoch": 2.61, "grad_norm": 4.878904342651367, "learning_rate": 6.060344827586207e-07, "loss": 0.1674, "step": 264975 }, { "epoch": 2.61, "grad_norm": 16.177276611328125, "learning_rate": 6.056034482758621e-07, "loss": 0.0802, "step": 265000 }, { "epoch": 2.61, "grad_norm": 1.3508738279342651, "learning_rate": 6.051896551724139e-07, "loss": 0.1624, "step": 265025 }, { "epoch": 2.61, "grad_norm": 14.815789222717285, "learning_rate": 6.047586206896552e-07, "loss": 0.0957, "step": 265050 }, { "epoch": 2.61, "grad_norm": 3.77820086479187, "learning_rate": 6.043275862068966e-07, "loss": 0.176, "step": 265075 }, { "epoch": 2.61, "grad_norm": 10.415873527526855, "learning_rate": 6.038965517241379e-07, "loss": 0.1243, "step": 265100 }, { "epoch": 2.61, "grad_norm": 2.7396106719970703, "learning_rate": 6.034655172413794e-07, "loss": 0.1903, "step": 265125 }, { "epoch": 2.61, "grad_norm": 5.869659423828125, "learning_rate": 6.030344827586207e-07, "loss": 0.1045, "step": 265150 }, { "epoch": 2.61, "grad_norm": 2.941333770751953, "learning_rate": 6.026034482758621e-07, "loss": 0.1714, "step": 265175 }, { "epoch": 2.61, "grad_norm": 6.574490070343018, "learning_rate": 6.021724137931035e-07, "loss": 0.0883, "step": 265200 }, { "epoch": 2.61, "grad_norm": 6.609555244445801, "learning_rate": 6.017413793103449e-07, "loss": 0.1541, "step": 265225 }, { "epoch": 2.61, "grad_norm": 5.307516098022461, "learning_rate": 6.013103448275862e-07, "loss": 0.103, "step": 265250 }, { "epoch": 2.61, "grad_norm": 0.16136199235916138, "learning_rate": 6.008793103448277e-07, "loss": 0.1615, "step": 265275 }, { "epoch": 2.61, "grad_norm": 6.724847316741943, "learning_rate": 6.00448275862069e-07, "loss": 0.0815, "step": 265300 }, { "epoch": 2.61, "grad_norm": 9.666382789611816, "learning_rate": 6.000172413793104e-07, "loss": 0.128, "step": 265325 }, { "epoch": 2.61, "grad_norm": 7.989638805389404, "learning_rate": 5.995862068965517e-07, "loss": 0.0962, "step": 265350 }, { "epoch": 2.61, "grad_norm": 4.339362621307373, "learning_rate": 5.991551724137931e-07, "loss": 0.1715, "step": 265375 }, { "epoch": 2.61, "grad_norm": 4.045022010803223, "learning_rate": 5.987241379310346e-07, "loss": 0.0891, "step": 265400 }, { "epoch": 2.61, "grad_norm": 6.877242565155029, "learning_rate": 5.982931034482759e-07, "loss": 0.1829, "step": 265425 }, { "epoch": 2.61, "grad_norm": 8.123942375183105, "learning_rate": 5.978620689655173e-07, "loss": 0.073, "step": 265450 }, { "epoch": 2.61, "grad_norm": 1.8850524425506592, "learning_rate": 5.974310344827586e-07, "loss": 0.1756, "step": 265475 }, { "epoch": 2.61, "grad_norm": 9.159110069274902, "learning_rate": 5.970000000000001e-07, "loss": 0.0748, "step": 265500 }, { "epoch": 2.61, "grad_norm": 4.2753472328186035, "learning_rate": 5.965689655172414e-07, "loss": 0.1536, "step": 265525 }, { "epoch": 2.61, "grad_norm": 17.724733352661133, "learning_rate": 5.961379310344828e-07, "loss": 0.1238, "step": 265550 }, { "epoch": 2.61, "grad_norm": 5.0737080574035645, "learning_rate": 5.957068965517242e-07, "loss": 0.1854, "step": 265575 }, { "epoch": 2.61, "grad_norm": 16.1152286529541, "learning_rate": 5.952758620689656e-07, "loss": 0.087, "step": 265600 }, { "epoch": 2.61, "grad_norm": 3.118333339691162, "learning_rate": 5.948448275862069e-07, "loss": 0.1826, "step": 265625 }, { "epoch": 2.61, "grad_norm": 10.973746299743652, "learning_rate": 5.944137931034484e-07, "loss": 0.1187, "step": 265650 }, { "epoch": 2.61, "grad_norm": 3.3305702209472656, "learning_rate": 5.939827586206897e-07, "loss": 0.1382, "step": 265675 }, { "epoch": 2.61, "grad_norm": 19.449155807495117, "learning_rate": 5.935517241379311e-07, "loss": 0.1072, "step": 265700 }, { "epoch": 2.61, "grad_norm": 6.263996601104736, "learning_rate": 5.931206896551724e-07, "loss": 0.1679, "step": 265725 }, { "epoch": 2.61, "grad_norm": 9.89710521697998, "learning_rate": 5.926896551724139e-07, "loss": 0.0749, "step": 265750 }, { "epoch": 2.61, "grad_norm": 5.721589088439941, "learning_rate": 5.922586206896552e-07, "loss": 0.1638, "step": 265775 }, { "epoch": 2.61, "grad_norm": 6.452892303466797, "learning_rate": 5.918275862068966e-07, "loss": 0.0954, "step": 265800 }, { "epoch": 2.61, "grad_norm": 4.132918357849121, "learning_rate": 5.913965517241379e-07, "loss": 0.1473, "step": 265825 }, { "epoch": 2.61, "grad_norm": 6.430087566375732, "learning_rate": 5.909655172413794e-07, "loss": 0.0764, "step": 265850 }, { "epoch": 2.61, "grad_norm": 7.041200160980225, "learning_rate": 5.905344827586207e-07, "loss": 0.1804, "step": 265875 }, { "epoch": 2.61, "grad_norm": 10.254203796386719, "learning_rate": 5.901034482758621e-07, "loss": 0.0947, "step": 265900 }, { "epoch": 2.61, "grad_norm": 3.268585681915283, "learning_rate": 5.896724137931034e-07, "loss": 0.1606, "step": 265925 }, { "epoch": 2.61, "grad_norm": 6.5101118087768555, "learning_rate": 5.892413793103448e-07, "loss": 0.0819, "step": 265950 }, { "epoch": 2.62, "grad_norm": 4.19061803817749, "learning_rate": 5.888103448275863e-07, "loss": 0.1648, "step": 265975 }, { "epoch": 2.62, "grad_norm": 10.545709609985352, "learning_rate": 5.883793103448276e-07, "loss": 0.1085, "step": 266000 }, { "epoch": 2.62, "grad_norm": 3.6783945560455322, "learning_rate": 5.879482758620691e-07, "loss": 0.1732, "step": 266025 }, { "epoch": 2.62, "grad_norm": 10.319551467895508, "learning_rate": 5.875172413793104e-07, "loss": 0.0938, "step": 266050 }, { "epoch": 2.62, "grad_norm": 6.0071282386779785, "learning_rate": 5.870862068965518e-07, "loss": 0.1836, "step": 266075 }, { "epoch": 2.62, "grad_norm": 12.024741172790527, "learning_rate": 5.866551724137932e-07, "loss": 0.0928, "step": 266100 }, { "epoch": 2.62, "grad_norm": 5.068450927734375, "learning_rate": 5.862241379310346e-07, "loss": 0.1777, "step": 266125 }, { "epoch": 2.62, "grad_norm": 6.675484657287598, "learning_rate": 5.857931034482759e-07, "loss": 0.1058, "step": 266150 }, { "epoch": 2.62, "grad_norm": 4.655085563659668, "learning_rate": 5.853620689655173e-07, "loss": 0.1828, "step": 266175 }, { "epoch": 2.62, "grad_norm": 10.141356468200684, "learning_rate": 5.849310344827587e-07, "loss": 0.0922, "step": 266200 }, { "epoch": 2.62, "grad_norm": 4.632564067840576, "learning_rate": 5.845000000000001e-07, "loss": 0.1564, "step": 266225 }, { "epoch": 2.62, "grad_norm": 6.809584140777588, "learning_rate": 5.840689655172414e-07, "loss": 0.0858, "step": 266250 }, { "epoch": 2.62, "grad_norm": 7.830529689788818, "learning_rate": 5.836379310344828e-07, "loss": 0.1678, "step": 266275 }, { "epoch": 2.62, "grad_norm": 9.271125793457031, "learning_rate": 5.832068965517241e-07, "loss": 0.0744, "step": 266300 }, { "epoch": 2.62, "grad_norm": 5.722837924957275, "learning_rate": 5.827758620689656e-07, "loss": 0.1405, "step": 266325 }, { "epoch": 2.62, "grad_norm": 7.901597023010254, "learning_rate": 5.823448275862069e-07, "loss": 0.1026, "step": 266350 }, { "epoch": 2.62, "grad_norm": 3.0333597660064697, "learning_rate": 5.819137931034483e-07, "loss": 0.1712, "step": 266375 }, { "epoch": 2.62, "grad_norm": 15.434218406677246, "learning_rate": 5.814827586206896e-07, "loss": 0.0954, "step": 266400 }, { "epoch": 2.62, "grad_norm": 6.038172245025635, "learning_rate": 5.810517241379311e-07, "loss": 0.1726, "step": 266425 }, { "epoch": 2.62, "grad_norm": 10.541999816894531, "learning_rate": 5.806206896551724e-07, "loss": 0.0681, "step": 266450 }, { "epoch": 2.62, "grad_norm": 2.9058024883270264, "learning_rate": 5.801896551724139e-07, "loss": 0.1445, "step": 266475 }, { "epoch": 2.62, "grad_norm": 8.863404273986816, "learning_rate": 5.797586206896553e-07, "loss": 0.0859, "step": 266500 }, { "epoch": 2.62, "grad_norm": 4.251485824584961, "learning_rate": 5.793275862068966e-07, "loss": 0.1761, "step": 266525 }, { "epoch": 2.62, "grad_norm": 8.516484260559082, "learning_rate": 5.78896551724138e-07, "loss": 0.0997, "step": 266550 }, { "epoch": 2.62, "grad_norm": 5.6417012214660645, "learning_rate": 5.784655172413794e-07, "loss": 0.1751, "step": 266575 }, { "epoch": 2.62, "grad_norm": 10.676694869995117, "learning_rate": 5.780344827586208e-07, "loss": 0.0828, "step": 266600 }, { "epoch": 2.62, "grad_norm": 1.4138067960739136, "learning_rate": 5.776034482758621e-07, "loss": 0.1949, "step": 266625 }, { "epoch": 2.62, "grad_norm": 10.709589004516602, "learning_rate": 5.771724137931035e-07, "loss": 0.0832, "step": 266650 }, { "epoch": 2.62, "grad_norm": 7.895551681518555, "learning_rate": 5.767413793103449e-07, "loss": 0.1275, "step": 266675 }, { "epoch": 2.62, "grad_norm": 12.781943321228027, "learning_rate": 5.763103448275863e-07, "loss": 0.097, "step": 266700 }, { "epoch": 2.62, "grad_norm": 6.505599498748779, "learning_rate": 5.758793103448276e-07, "loss": 0.1479, "step": 266725 }, { "epoch": 2.62, "grad_norm": 13.134203910827637, "learning_rate": 5.75448275862069e-07, "loss": 0.082, "step": 266750 }, { "epoch": 2.62, "grad_norm": 8.571136474609375, "learning_rate": 5.750172413793103e-07, "loss": 0.1931, "step": 266775 }, { "epoch": 2.62, "grad_norm": 6.633064270019531, "learning_rate": 5.745862068965518e-07, "loss": 0.0634, "step": 266800 }, { "epoch": 2.62, "grad_norm": 2.9002797603607178, "learning_rate": 5.741551724137931e-07, "loss": 0.1377, "step": 266825 }, { "epoch": 2.62, "grad_norm": 12.718097686767578, "learning_rate": 5.737241379310345e-07, "loss": 0.1105, "step": 266850 }, { "epoch": 2.62, "grad_norm": 3.438603401184082, "learning_rate": 5.732931034482759e-07, "loss": 0.1866, "step": 266875 }, { "epoch": 2.62, "grad_norm": 7.295401573181152, "learning_rate": 5.728620689655173e-07, "loss": 0.0893, "step": 266900 }, { "epoch": 2.62, "grad_norm": 3.5251874923706055, "learning_rate": 5.724310344827587e-07, "loss": 0.1566, "step": 266925 }, { "epoch": 2.62, "grad_norm": 11.314946174621582, "learning_rate": 5.720000000000001e-07, "loss": 0.0806, "step": 266950 }, { "epoch": 2.62, "grad_norm": 2.889420986175537, "learning_rate": 5.715689655172414e-07, "loss": 0.1455, "step": 266975 }, { "epoch": 2.63, "grad_norm": 6.614494800567627, "learning_rate": 5.711379310344828e-07, "loss": 0.0801, "step": 267000 }, { "epoch": 2.63, "grad_norm": 1.0102295875549316, "learning_rate": 5.707068965517243e-07, "loss": 0.1434, "step": 267025 }, { "epoch": 2.63, "grad_norm": 12.815929412841797, "learning_rate": 5.702758620689656e-07, "loss": 0.0818, "step": 267050 }, { "epoch": 2.63, "grad_norm": 2.5995612144470215, "learning_rate": 5.69844827586207e-07, "loss": 0.1681, "step": 267075 }, { "epoch": 2.63, "grad_norm": 10.128079414367676, "learning_rate": 5.694137931034483e-07, "loss": 0.0854, "step": 267100 }, { "epoch": 2.63, "grad_norm": 2.85608172416687, "learning_rate": 5.689827586206897e-07, "loss": 0.157, "step": 267125 }, { "epoch": 2.63, "grad_norm": 14.182982444763184, "learning_rate": 5.685517241379311e-07, "loss": 0.1177, "step": 267150 }, { "epoch": 2.63, "grad_norm": 5.525214672088623, "learning_rate": 5.681206896551725e-07, "loss": 0.13, "step": 267175 }, { "epoch": 2.63, "grad_norm": 7.883551597595215, "learning_rate": 5.676896551724138e-07, "loss": 0.0923, "step": 267200 }, { "epoch": 2.63, "grad_norm": 2.135739803314209, "learning_rate": 5.672758620689656e-07, "loss": 0.1952, "step": 267225 }, { "epoch": 2.63, "grad_norm": 8.34182071685791, "learning_rate": 5.668448275862069e-07, "loss": 0.1236, "step": 267250 }, { "epoch": 2.63, "grad_norm": 1.909991979598999, "learning_rate": 5.664137931034483e-07, "loss": 0.1864, "step": 267275 }, { "epoch": 2.63, "grad_norm": 6.400166034698486, "learning_rate": 5.659827586206897e-07, "loss": 0.116, "step": 267300 }, { "epoch": 2.63, "grad_norm": 5.071900844573975, "learning_rate": 5.655517241379311e-07, "loss": 0.1665, "step": 267325 }, { "epoch": 2.63, "grad_norm": 9.592985153198242, "learning_rate": 5.651206896551724e-07, "loss": 0.0919, "step": 267350 }, { "epoch": 2.63, "grad_norm": 6.148308277130127, "learning_rate": 5.646896551724138e-07, "loss": 0.1482, "step": 267375 }, { "epoch": 2.63, "grad_norm": 12.968257904052734, "learning_rate": 5.642586206896551e-07, "loss": 0.1071, "step": 267400 }, { "epoch": 2.63, "grad_norm": 6.258072853088379, "learning_rate": 5.638275862068966e-07, "loss": 0.151, "step": 267425 }, { "epoch": 2.63, "grad_norm": 6.458671569824219, "learning_rate": 5.63396551724138e-07, "loss": 0.0985, "step": 267450 }, { "epoch": 2.63, "grad_norm": 8.9569673538208, "learning_rate": 5.629655172413793e-07, "loss": 0.2073, "step": 267475 }, { "epoch": 2.63, "grad_norm": 9.659818649291992, "learning_rate": 5.625344827586208e-07, "loss": 0.1039, "step": 267500 }, { "epoch": 2.63, "grad_norm": 3.252002239227295, "learning_rate": 5.621034482758621e-07, "loss": 0.1347, "step": 267525 }, { "epoch": 2.63, "grad_norm": 9.543902397155762, "learning_rate": 5.616724137931035e-07, "loss": 0.0952, "step": 267550 }, { "epoch": 2.63, "grad_norm": 4.375005722045898, "learning_rate": 5.612413793103449e-07, "loss": 0.2055, "step": 267575 }, { "epoch": 2.63, "grad_norm": 12.217254638671875, "learning_rate": 5.608103448275863e-07, "loss": 0.1049, "step": 267600 }, { "epoch": 2.63, "grad_norm": 6.312933921813965, "learning_rate": 5.603793103448276e-07, "loss": 0.1925, "step": 267625 }, { "epoch": 2.63, "grad_norm": 9.97873592376709, "learning_rate": 5.599482758620691e-07, "loss": 0.0882, "step": 267650 }, { "epoch": 2.63, "grad_norm": 6.836507320404053, "learning_rate": 5.595172413793104e-07, "loss": 0.194, "step": 267675 }, { "epoch": 2.63, "grad_norm": 11.687007904052734, "learning_rate": 5.590862068965518e-07, "loss": 0.1087, "step": 267700 }, { "epoch": 2.63, "grad_norm": 5.236645221710205, "learning_rate": 5.586551724137931e-07, "loss": 0.2002, "step": 267725 }, { "epoch": 2.63, "grad_norm": 7.330352306365967, "learning_rate": 5.582241379310345e-07, "loss": 0.0929, "step": 267750 }, { "epoch": 2.63, "grad_norm": 4.59222412109375, "learning_rate": 5.577931034482759e-07, "loss": 0.1848, "step": 267775 }, { "epoch": 2.63, "grad_norm": 7.5407280921936035, "learning_rate": 5.573620689655173e-07, "loss": 0.0754, "step": 267800 }, { "epoch": 2.63, "grad_norm": 2.407191753387451, "learning_rate": 5.569310344827586e-07, "loss": 0.1911, "step": 267825 }, { "epoch": 2.63, "grad_norm": 7.718364715576172, "learning_rate": 5.565e-07, "loss": 0.1022, "step": 267850 }, { "epoch": 2.63, "grad_norm": 4.423686981201172, "learning_rate": 5.560689655172414e-07, "loss": 0.1465, "step": 267875 }, { "epoch": 2.63, "grad_norm": 13.15058422088623, "learning_rate": 5.556379310344828e-07, "loss": 0.0966, "step": 267900 }, { "epoch": 2.63, "grad_norm": 2.782299041748047, "learning_rate": 5.552068965517241e-07, "loss": 0.1395, "step": 267925 }, { "epoch": 2.63, "grad_norm": 11.5399169921875, "learning_rate": 5.547758620689656e-07, "loss": 0.0802, "step": 267950 }, { "epoch": 2.63, "grad_norm": 5.387665271759033, "learning_rate": 5.54344827586207e-07, "loss": 0.1488, "step": 267975 }, { "epoch": 2.64, "grad_norm": 9.462004661560059, "learning_rate": 5.539137931034483e-07, "loss": 0.1023, "step": 268000 }, { "epoch": 2.64, "grad_norm": 6.272113800048828, "learning_rate": 5.534827586206898e-07, "loss": 0.207, "step": 268025 }, { "epoch": 2.64, "grad_norm": 9.559403419494629, "learning_rate": 5.530517241379311e-07, "loss": 0.0936, "step": 268050 }, { "epoch": 2.64, "grad_norm": 6.232882976531982, "learning_rate": 5.526206896551725e-07, "loss": 0.1862, "step": 268075 }, { "epoch": 2.64, "grad_norm": 8.908910751342773, "learning_rate": 5.521896551724138e-07, "loss": 0.0863, "step": 268100 }, { "epoch": 2.64, "grad_norm": 7.241240501403809, "learning_rate": 5.517586206896553e-07, "loss": 0.2102, "step": 268125 }, { "epoch": 2.64, "grad_norm": 13.991432189941406, "learning_rate": 5.513275862068966e-07, "loss": 0.1084, "step": 268150 }, { "epoch": 2.64, "grad_norm": 1.017784833908081, "learning_rate": 5.50896551724138e-07, "loss": 0.1505, "step": 268175 }, { "epoch": 2.64, "grad_norm": 10.573189735412598, "learning_rate": 5.504655172413793e-07, "loss": 0.1015, "step": 268200 }, { "epoch": 2.64, "grad_norm": 12.611672401428223, "learning_rate": 5.500344827586207e-07, "loss": 0.275, "step": 268225 }, { "epoch": 2.64, "grad_norm": 7.858327865600586, "learning_rate": 5.496034482758621e-07, "loss": 0.1121, "step": 268250 }, { "epoch": 2.64, "grad_norm": 3.7377724647521973, "learning_rate": 5.491724137931035e-07, "loss": 0.1587, "step": 268275 }, { "epoch": 2.64, "grad_norm": 6.503598213195801, "learning_rate": 5.487413793103448e-07, "loss": 0.0946, "step": 268300 }, { "epoch": 2.64, "grad_norm": 3.2892425060272217, "learning_rate": 5.483103448275862e-07, "loss": 0.1719, "step": 268325 }, { "epoch": 2.64, "grad_norm": 10.839109420776367, "learning_rate": 5.478793103448276e-07, "loss": 0.1102, "step": 268350 }, { "epoch": 2.64, "grad_norm": 3.5594427585601807, "learning_rate": 5.47448275862069e-07, "loss": 0.174, "step": 268375 }, { "epoch": 2.64, "grad_norm": 8.630229949951172, "learning_rate": 5.470172413793104e-07, "loss": 0.1041, "step": 268400 }, { "epoch": 2.64, "grad_norm": 7.3795366287231445, "learning_rate": 5.465862068965518e-07, "loss": 0.1946, "step": 268425 }, { "epoch": 2.64, "grad_norm": 1.99299156665802, "learning_rate": 5.461551724137931e-07, "loss": 0.1068, "step": 268450 }, { "epoch": 2.64, "grad_norm": 4.7756667137146, "learning_rate": 5.457241379310346e-07, "loss": 0.1953, "step": 268475 }, { "epoch": 2.64, "grad_norm": 29.753799438476562, "learning_rate": 5.45293103448276e-07, "loss": 0.1005, "step": 268500 }, { "epoch": 2.64, "grad_norm": 6.260744094848633, "learning_rate": 5.448620689655173e-07, "loss": 0.1713, "step": 268525 }, { "epoch": 2.64, "grad_norm": 10.198236465454102, "learning_rate": 5.444310344827587e-07, "loss": 0.1118, "step": 268550 }, { "epoch": 2.64, "grad_norm": 3.1437737941741943, "learning_rate": 5.44e-07, "loss": 0.1845, "step": 268575 }, { "epoch": 2.64, "grad_norm": 12.660253524780273, "learning_rate": 5.435689655172415e-07, "loss": 0.0645, "step": 268600 }, { "epoch": 2.64, "grad_norm": 4.908366680145264, "learning_rate": 5.431379310344828e-07, "loss": 0.1467, "step": 268625 }, { "epoch": 2.64, "grad_norm": 9.243097305297852, "learning_rate": 5.427068965517242e-07, "loss": 0.093, "step": 268650 }, { "epoch": 2.64, "grad_norm": 3.2139892578125, "learning_rate": 5.422758620689655e-07, "loss": 0.1727, "step": 268675 }, { "epoch": 2.64, "grad_norm": 9.781295776367188, "learning_rate": 5.418448275862069e-07, "loss": 0.0793, "step": 268700 }, { "epoch": 2.64, "grad_norm": 3.8477025032043457, "learning_rate": 5.414137931034483e-07, "loss": 0.1749, "step": 268725 }, { "epoch": 2.64, "grad_norm": 10.507689476013184, "learning_rate": 5.409827586206897e-07, "loss": 0.0851, "step": 268750 }, { "epoch": 2.64, "grad_norm": 4.805885314941406, "learning_rate": 5.40551724137931e-07, "loss": 0.1967, "step": 268775 }, { "epoch": 2.64, "grad_norm": 9.77477741241455, "learning_rate": 5.401206896551725e-07, "loss": 0.1085, "step": 268800 }, { "epoch": 2.64, "grad_norm": 3.94309401512146, "learning_rate": 5.396896551724138e-07, "loss": 0.1873, "step": 268825 }, { "epoch": 2.64, "grad_norm": 8.7026948928833, "learning_rate": 5.392586206896553e-07, "loss": 0.089, "step": 268850 }, { "epoch": 2.64, "grad_norm": 4.831486701965332, "learning_rate": 5.388275862068966e-07, "loss": 0.2051, "step": 268875 }, { "epoch": 2.64, "grad_norm": 16.40460777282715, "learning_rate": 5.38396551724138e-07, "loss": 0.1126, "step": 268900 }, { "epoch": 2.64, "grad_norm": 0.12737853825092316, "learning_rate": 5.379655172413793e-07, "loss": 0.1994, "step": 268925 }, { "epoch": 2.64, "grad_norm": 13.061212539672852, "learning_rate": 5.375344827586208e-07, "loss": 0.0897, "step": 268950 }, { "epoch": 2.64, "grad_norm": 5.425416469573975, "learning_rate": 5.371034482758621e-07, "loss": 0.2199, "step": 268975 }, { "epoch": 2.64, "grad_norm": 11.592578887939453, "learning_rate": 5.366724137931035e-07, "loss": 0.1106, "step": 269000 }, { "epoch": 2.65, "grad_norm": 7.199222087860107, "learning_rate": 5.362413793103448e-07, "loss": 0.1771, "step": 269025 }, { "epoch": 2.65, "grad_norm": 10.453871726989746, "learning_rate": 5.358103448275862e-07, "loss": 0.0924, "step": 269050 }, { "epoch": 2.65, "grad_norm": 3.4728846549987793, "learning_rate": 5.353793103448276e-07, "loss": 0.1585, "step": 269075 }, { "epoch": 2.65, "grad_norm": 6.748846054077148, "learning_rate": 5.34948275862069e-07, "loss": 0.0791, "step": 269100 }, { "epoch": 2.65, "grad_norm": 3.0487186908721924, "learning_rate": 5.345172413793103e-07, "loss": 0.1556, "step": 269125 }, { "epoch": 2.65, "grad_norm": 4.928925514221191, "learning_rate": 5.340862068965517e-07, "loss": 0.1321, "step": 269150 }, { "epoch": 2.65, "grad_norm": 3.0936546325683594, "learning_rate": 5.336551724137932e-07, "loss": 0.2177, "step": 269175 }, { "epoch": 2.65, "grad_norm": 12.176101684570312, "learning_rate": 5.332241379310345e-07, "loss": 0.0824, "step": 269200 }, { "epoch": 2.65, "grad_norm": 2.9380381107330322, "learning_rate": 5.327931034482759e-07, "loss": 0.1634, "step": 269225 }, { "epoch": 2.65, "grad_norm": 12.272971153259277, "learning_rate": 5.323620689655173e-07, "loss": 0.1057, "step": 269250 }, { "epoch": 2.65, "grad_norm": 0.07159354537725449, "learning_rate": 5.319310344827587e-07, "loss": 0.1746, "step": 269275 }, { "epoch": 2.65, "grad_norm": 10.867674827575684, "learning_rate": 5.315000000000001e-07, "loss": 0.1076, "step": 269300 }, { "epoch": 2.65, "grad_norm": 2.1457321643829346, "learning_rate": 5.310689655172415e-07, "loss": 0.181, "step": 269325 }, { "epoch": 2.65, "grad_norm": 11.061662673950195, "learning_rate": 5.306379310344828e-07, "loss": 0.1133, "step": 269350 }, { "epoch": 2.65, "grad_norm": 1.801565408706665, "learning_rate": 5.302068965517242e-07, "loss": 0.1891, "step": 269375 }, { "epoch": 2.65, "grad_norm": 14.920011520385742, "learning_rate": 5.297758620689655e-07, "loss": 0.1187, "step": 269400 }, { "epoch": 2.65, "grad_norm": 3.807058811187744, "learning_rate": 5.29344827586207e-07, "loss": 0.1604, "step": 269425 }, { "epoch": 2.65, "grad_norm": 13.6696195602417, "learning_rate": 5.289137931034483e-07, "loss": 0.0828, "step": 269450 }, { "epoch": 2.65, "grad_norm": 4.6136794090271, "learning_rate": 5.285000000000001e-07, "loss": 0.2027, "step": 269475 }, { "epoch": 2.65, "grad_norm": 11.076070785522461, "learning_rate": 5.280689655172415e-07, "loss": 0.0932, "step": 269500 }, { "epoch": 2.65, "grad_norm": 3.636140823364258, "learning_rate": 5.276379310344828e-07, "loss": 0.2008, "step": 269525 }, { "epoch": 2.65, "grad_norm": 12.927018165588379, "learning_rate": 5.272068965517242e-07, "loss": 0.0907, "step": 269550 }, { "epoch": 2.65, "grad_norm": 2.8271124362945557, "learning_rate": 5.267758620689656e-07, "loss": 0.1578, "step": 269575 }, { "epoch": 2.65, "grad_norm": 11.113165855407715, "learning_rate": 5.26344827586207e-07, "loss": 0.0813, "step": 269600 }, { "epoch": 2.65, "grad_norm": 2.80135178565979, "learning_rate": 5.259137931034483e-07, "loss": 0.1672, "step": 269625 }, { "epoch": 2.65, "grad_norm": 13.102714538574219, "learning_rate": 5.254827586206897e-07, "loss": 0.0836, "step": 269650 }, { "epoch": 2.65, "grad_norm": 5.382194519042969, "learning_rate": 5.250517241379311e-07, "loss": 0.2008, "step": 269675 }, { "epoch": 2.65, "grad_norm": 8.800381660461426, "learning_rate": 5.246206896551725e-07, "loss": 0.0888, "step": 269700 }, { "epoch": 2.65, "grad_norm": 0.12523823976516724, "learning_rate": 5.241896551724138e-07, "loss": 0.1791, "step": 269725 }, { "epoch": 2.65, "grad_norm": 12.254355430603027, "learning_rate": 5.237586206896552e-07, "loss": 0.081, "step": 269750 }, { "epoch": 2.65, "grad_norm": 5.106848239898682, "learning_rate": 5.233275862068965e-07, "loss": 0.2092, "step": 269775 }, { "epoch": 2.65, "grad_norm": 8.784981727600098, "learning_rate": 5.22896551724138e-07, "loss": 0.0969, "step": 269800 }, { "epoch": 2.65, "grad_norm": 6.177005290985107, "learning_rate": 5.224655172413793e-07, "loss": 0.1412, "step": 269825 }, { "epoch": 2.65, "grad_norm": 9.84982967376709, "learning_rate": 5.220344827586207e-07, "loss": 0.0613, "step": 269850 }, { "epoch": 2.65, "grad_norm": 9.91327953338623, "learning_rate": 5.216034482758621e-07, "loss": 0.1884, "step": 269875 }, { "epoch": 2.65, "grad_norm": 13.382601737976074, "learning_rate": 5.211724137931035e-07, "loss": 0.1315, "step": 269900 }, { "epoch": 2.65, "grad_norm": 1.7432328462600708, "learning_rate": 5.207413793103449e-07, "loss": 0.1613, "step": 269925 }, { "epoch": 2.65, "grad_norm": 11.143198013305664, "learning_rate": 5.203103448275863e-07, "loss": 0.0854, "step": 269950 }, { "epoch": 2.65, "grad_norm": 3.5570108890533447, "learning_rate": 5.198793103448276e-07, "loss": 0.1844, "step": 269975 }, { "epoch": 2.65, "grad_norm": 13.603728294372559, "learning_rate": 5.19448275862069e-07, "loss": 0.1027, "step": 270000 }, { "epoch": 2.65, "grad_norm": 3.2119219303131104, "learning_rate": 5.190172413793105e-07, "loss": 0.155, "step": 270025 }, { "epoch": 2.66, "grad_norm": 11.398340225219727, "learning_rate": 5.185862068965518e-07, "loss": 0.0717, "step": 270050 }, { "epoch": 2.66, "grad_norm": 4.361978054046631, "learning_rate": 5.181551724137932e-07, "loss": 0.1502, "step": 270075 }, { "epoch": 2.66, "grad_norm": 5.893349647521973, "learning_rate": 5.177241379310345e-07, "loss": 0.0818, "step": 270100 }, { "epoch": 2.66, "grad_norm": 4.00339937210083, "learning_rate": 5.172931034482759e-07, "loss": 0.1953, "step": 270125 }, { "epoch": 2.66, "grad_norm": 7.7173051834106445, "learning_rate": 5.168620689655173e-07, "loss": 0.0849, "step": 270150 }, { "epoch": 2.66, "grad_norm": 4.641165733337402, "learning_rate": 5.164310344827587e-07, "loss": 0.1702, "step": 270175 }, { "epoch": 2.66, "grad_norm": 10.512491226196289, "learning_rate": 5.16e-07, "loss": 0.1001, "step": 270200 }, { "epoch": 2.66, "grad_norm": 3.677961826324463, "learning_rate": 5.155689655172414e-07, "loss": 0.1754, "step": 270225 }, { "epoch": 2.66, "grad_norm": 9.544182777404785, "learning_rate": 5.151379310344828e-07, "loss": 0.079, "step": 270250 }, { "epoch": 2.66, "grad_norm": 8.818194389343262, "learning_rate": 5.147068965517242e-07, "loss": 0.1718, "step": 270275 }, { "epoch": 2.66, "grad_norm": 6.489499568939209, "learning_rate": 5.142758620689655e-07, "loss": 0.0975, "step": 270300 }, { "epoch": 2.66, "grad_norm": 3.844404697418213, "learning_rate": 5.13844827586207e-07, "loss": 0.1564, "step": 270325 }, { "epoch": 2.66, "grad_norm": 10.758726119995117, "learning_rate": 5.134137931034483e-07, "loss": 0.1055, "step": 270350 }, { "epoch": 2.66, "grad_norm": 4.2537641525268555, "learning_rate": 5.129827586206897e-07, "loss": 0.1712, "step": 270375 }, { "epoch": 2.66, "grad_norm": 6.791684627532959, "learning_rate": 5.125517241379311e-07, "loss": 0.0943, "step": 270400 }, { "epoch": 2.66, "grad_norm": 3.288673162460327, "learning_rate": 5.121206896551725e-07, "loss": 0.2078, "step": 270425 }, { "epoch": 2.66, "grad_norm": 7.376685619354248, "learning_rate": 5.116896551724138e-07, "loss": 0.0925, "step": 270450 }, { "epoch": 2.66, "grad_norm": 1.7632189989089966, "learning_rate": 5.112586206896552e-07, "loss": 0.1674, "step": 270475 }, { "epoch": 2.66, "grad_norm": 6.562232494354248, "learning_rate": 5.108275862068966e-07, "loss": 0.0806, "step": 270500 }, { "epoch": 2.66, "grad_norm": 6.003345489501953, "learning_rate": 5.10396551724138e-07, "loss": 0.1667, "step": 270525 }, { "epoch": 2.66, "grad_norm": 10.766347885131836, "learning_rate": 5.099655172413793e-07, "loss": 0.1156, "step": 270550 }, { "epoch": 2.66, "grad_norm": 2.188062906265259, "learning_rate": 5.095344827586207e-07, "loss": 0.1992, "step": 270575 }, { "epoch": 2.66, "grad_norm": 19.975326538085938, "learning_rate": 5.09103448275862e-07, "loss": 0.1171, "step": 270600 }, { "epoch": 2.66, "grad_norm": 5.218530654907227, "learning_rate": 5.086724137931035e-07, "loss": 0.1414, "step": 270625 }, { "epoch": 2.66, "grad_norm": 11.911033630371094, "learning_rate": 5.082413793103449e-07, "loss": 0.0961, "step": 270650 }, { "epoch": 2.66, "grad_norm": 4.298792839050293, "learning_rate": 5.078103448275862e-07, "loss": 0.189, "step": 270675 }, { "epoch": 2.66, "grad_norm": 11.229650497436523, "learning_rate": 5.073793103448277e-07, "loss": 0.0947, "step": 270700 }, { "epoch": 2.66, "grad_norm": 6.194723129272461, "learning_rate": 5.06948275862069e-07, "loss": 0.167, "step": 270725 }, { "epoch": 2.66, "grad_norm": 10.18233871459961, "learning_rate": 5.065172413793104e-07, "loss": 0.1064, "step": 270750 }, { "epoch": 2.66, "grad_norm": 3.581666946411133, "learning_rate": 5.060862068965518e-07, "loss": 0.2064, "step": 270775 }, { "epoch": 2.66, "grad_norm": 9.176070213317871, "learning_rate": 5.056551724137932e-07, "loss": 0.0773, "step": 270800 }, { "epoch": 2.66, "grad_norm": 8.32312297821045, "learning_rate": 5.052241379310345e-07, "loss": 0.1842, "step": 270825 }, { "epoch": 2.66, "grad_norm": 9.736730575561523, "learning_rate": 5.04793103448276e-07, "loss": 0.0884, "step": 270850 }, { "epoch": 2.66, "grad_norm": 3.8241560459136963, "learning_rate": 5.043620689655173e-07, "loss": 0.1952, "step": 270875 }, { "epoch": 2.66, "grad_norm": 8.531893730163574, "learning_rate": 5.039310344827587e-07, "loss": 0.1174, "step": 270900 }, { "epoch": 2.66, "grad_norm": 1.4141368865966797, "learning_rate": 5.035e-07, "loss": 0.1521, "step": 270925 }, { "epoch": 2.66, "grad_norm": 19.225933074951172, "learning_rate": 5.030689655172414e-07, "loss": 0.0895, "step": 270950 }, { "epoch": 2.66, "grad_norm": 5.759907245635986, "learning_rate": 5.026379310344828e-07, "loss": 0.1662, "step": 270975 }, { "epoch": 2.66, "grad_norm": 12.196624755859375, "learning_rate": 5.022068965517242e-07, "loss": 0.0739, "step": 271000 }, { "epoch": 2.66, "grad_norm": 1.9801758527755737, "learning_rate": 5.017758620689655e-07, "loss": 0.1635, "step": 271025 }, { "epoch": 2.67, "grad_norm": 17.1451473236084, "learning_rate": 5.013448275862069e-07, "loss": 0.1026, "step": 271050 }, { "epoch": 2.67, "grad_norm": 3.4030704498291016, "learning_rate": 5.009137931034482e-07, "loss": 0.1685, "step": 271075 }, { "epoch": 2.67, "grad_norm": 12.06208610534668, "learning_rate": 5.004827586206897e-07, "loss": 0.1044, "step": 271100 }, { "epoch": 2.67, "grad_norm": 9.760117530822754, "learning_rate": 5.00051724137931e-07, "loss": 0.1798, "step": 271125 }, { "epoch": 2.67, "grad_norm": 10.633996963500977, "learning_rate": 4.996206896551725e-07, "loss": 0.1, "step": 271150 }, { "epoch": 2.67, "grad_norm": 8.416152000427246, "learning_rate": 4.991896551724139e-07, "loss": 0.1691, "step": 271175 }, { "epoch": 2.67, "grad_norm": 9.5836763381958, "learning_rate": 4.987586206896552e-07, "loss": 0.0995, "step": 271200 }, { "epoch": 2.67, "grad_norm": 2.378251791000366, "learning_rate": 4.983275862068967e-07, "loss": 0.1708, "step": 271225 }, { "epoch": 2.67, "grad_norm": 10.037246704101562, "learning_rate": 4.97896551724138e-07, "loss": 0.0945, "step": 271250 }, { "epoch": 2.67, "grad_norm": 5.8847551345825195, "learning_rate": 4.974655172413794e-07, "loss": 0.1902, "step": 271275 }, { "epoch": 2.67, "grad_norm": 11.538129806518555, "learning_rate": 4.970344827586207e-07, "loss": 0.0651, "step": 271300 }, { "epoch": 2.67, "grad_norm": 4.659960746765137, "learning_rate": 4.966034482758622e-07, "loss": 0.1853, "step": 271325 }, { "epoch": 2.67, "grad_norm": 14.070817947387695, "learning_rate": 4.961724137931035e-07, "loss": 0.1177, "step": 271350 }, { "epoch": 2.67, "grad_norm": 9.41923713684082, "learning_rate": 4.957413793103449e-07, "loss": 0.1688, "step": 271375 }, { "epoch": 2.67, "grad_norm": 13.090664863586426, "learning_rate": 4.953103448275862e-07, "loss": 0.101, "step": 271400 }, { "epoch": 2.67, "grad_norm": 2.983051300048828, "learning_rate": 4.948793103448276e-07, "loss": 0.1603, "step": 271425 }, { "epoch": 2.67, "grad_norm": 13.377031326293945, "learning_rate": 4.94448275862069e-07, "loss": 0.1438, "step": 271450 }, { "epoch": 2.67, "grad_norm": 6.470107555389404, "learning_rate": 4.940344827586208e-07, "loss": 0.197, "step": 271475 }, { "epoch": 2.67, "grad_norm": 8.723113059997559, "learning_rate": 4.936034482758622e-07, "loss": 0.096, "step": 271500 }, { "epoch": 2.67, "grad_norm": 4.291405200958252, "learning_rate": 4.931724137931035e-07, "loss": 0.1492, "step": 271525 }, { "epoch": 2.67, "grad_norm": 14.551003456115723, "learning_rate": 4.927413793103449e-07, "loss": 0.1011, "step": 271550 }, { "epoch": 2.67, "grad_norm": 3.607250452041626, "learning_rate": 4.923103448275862e-07, "loss": 0.2029, "step": 271575 }, { "epoch": 2.67, "grad_norm": 11.583322525024414, "learning_rate": 4.918793103448277e-07, "loss": 0.0828, "step": 271600 }, { "epoch": 2.67, "grad_norm": 1.2510536909103394, "learning_rate": 4.91448275862069e-07, "loss": 0.187, "step": 271625 }, { "epoch": 2.67, "grad_norm": 20.956464767456055, "learning_rate": 4.910172413793104e-07, "loss": 0.1049, "step": 271650 }, { "epoch": 2.67, "grad_norm": 1.6085699796676636, "learning_rate": 4.905862068965517e-07, "loss": 0.1626, "step": 271675 }, { "epoch": 2.67, "grad_norm": 10.533870697021484, "learning_rate": 4.901551724137931e-07, "loss": 0.0892, "step": 271700 }, { "epoch": 2.67, "grad_norm": 6.719126224517822, "learning_rate": 4.897241379310345e-07, "loss": 0.1492, "step": 271725 }, { "epoch": 2.67, "grad_norm": 5.906526565551758, "learning_rate": 4.892931034482759e-07, "loss": 0.11, "step": 271750 }, { "epoch": 2.67, "grad_norm": 4.450874328613281, "learning_rate": 4.888620689655172e-07, "loss": 0.1863, "step": 271775 }, { "epoch": 2.67, "grad_norm": 8.061332702636719, "learning_rate": 4.884310344827587e-07, "loss": 0.0888, "step": 271800 }, { "epoch": 2.67, "grad_norm": 5.448086738586426, "learning_rate": 4.88e-07, "loss": 0.1246, "step": 271825 }, { "epoch": 2.67, "grad_norm": 11.962695121765137, "learning_rate": 4.875689655172415e-07, "loss": 0.1137, "step": 271850 }, { "epoch": 2.67, "grad_norm": 60.4663200378418, "learning_rate": 4.871379310344828e-07, "loss": 0.1882, "step": 271875 }, { "epoch": 2.67, "grad_norm": 4.946866512298584, "learning_rate": 4.867068965517242e-07, "loss": 0.118, "step": 271900 }, { "epoch": 2.67, "grad_norm": 5.5967183113098145, "learning_rate": 4.862758620689655e-07, "loss": 0.1751, "step": 271925 }, { "epoch": 2.67, "grad_norm": 3.929089307785034, "learning_rate": 4.85844827586207e-07, "loss": 0.0906, "step": 271950 }, { "epoch": 2.67, "grad_norm": 5.722626686096191, "learning_rate": 4.854137931034483e-07, "loss": 0.176, "step": 271975 }, { "epoch": 2.67, "grad_norm": 10.230844497680664, "learning_rate": 4.849827586206897e-07, "loss": 0.0976, "step": 272000 }, { "epoch": 2.67, "grad_norm": 6.876797676086426, "learning_rate": 4.84551724137931e-07, "loss": 0.1477, "step": 272025 }, { "epoch": 2.67, "grad_norm": 10.710905075073242, "learning_rate": 4.841206896551724e-07, "loss": 0.1046, "step": 272050 }, { "epoch": 2.68, "grad_norm": 45.49031066894531, "learning_rate": 4.836896551724139e-07, "loss": 0.1758, "step": 272075 }, { "epoch": 2.68, "grad_norm": 8.191593170166016, "learning_rate": 4.832586206896552e-07, "loss": 0.1173, "step": 272100 }, { "epoch": 2.68, "grad_norm": 4.702094078063965, "learning_rate": 4.828275862068966e-07, "loss": 0.1742, "step": 272125 }, { "epoch": 2.68, "grad_norm": 8.753379821777344, "learning_rate": 4.823965517241379e-07, "loss": 0.0889, "step": 272150 }, { "epoch": 2.68, "grad_norm": 1.2249341011047363, "learning_rate": 4.819655172413794e-07, "loss": 0.1466, "step": 272175 }, { "epoch": 2.68, "grad_norm": 9.233675956726074, "learning_rate": 4.815344827586207e-07, "loss": 0.0826, "step": 272200 }, { "epoch": 2.68, "grad_norm": 1.1178563833236694, "learning_rate": 4.811034482758621e-07, "loss": 0.1918, "step": 272225 }, { "epoch": 2.68, "grad_norm": 8.938695907592773, "learning_rate": 4.806724137931035e-07, "loss": 0.1088, "step": 272250 }, { "epoch": 2.68, "grad_norm": 5.103480815887451, "learning_rate": 4.802413793103449e-07, "loss": 0.1335, "step": 272275 }, { "epoch": 2.68, "grad_norm": 9.808921813964844, "learning_rate": 4.798103448275863e-07, "loss": 0.0926, "step": 272300 }, { "epoch": 2.68, "grad_norm": 4.147418022155762, "learning_rate": 4.793793103448277e-07, "loss": 0.146, "step": 272325 }, { "epoch": 2.68, "grad_norm": 10.300923347473145, "learning_rate": 4.78948275862069e-07, "loss": 0.0971, "step": 272350 }, { "epoch": 2.68, "grad_norm": 6.569786548614502, "learning_rate": 4.785172413793104e-07, "loss": 0.1738, "step": 272375 }, { "epoch": 2.68, "grad_norm": 4.177555084228516, "learning_rate": 4.780862068965517e-07, "loss": 0.0971, "step": 272400 }, { "epoch": 2.68, "grad_norm": 7.853904724121094, "learning_rate": 4.776551724137932e-07, "loss": 0.1825, "step": 272425 }, { "epoch": 2.68, "grad_norm": 4.9238505363464355, "learning_rate": 4.772241379310345e-07, "loss": 0.0864, "step": 272450 }, { "epoch": 2.68, "grad_norm": 2.844587564468384, "learning_rate": 4.767931034482759e-07, "loss": 0.1664, "step": 272475 }, { "epoch": 2.68, "grad_norm": 6.301036834716797, "learning_rate": 4.7636206896551724e-07, "loss": 0.1008, "step": 272500 }, { "epoch": 2.68, "grad_norm": 4.938023567199707, "learning_rate": 4.759310344827587e-07, "loss": 0.1935, "step": 272525 }, { "epoch": 2.68, "grad_norm": 1.4416383504867554, "learning_rate": 4.7550000000000004e-07, "loss": 0.1024, "step": 272550 }, { "epoch": 2.68, "grad_norm": 6.48694372177124, "learning_rate": 4.7506896551724145e-07, "loss": 0.1502, "step": 272575 }, { "epoch": 2.68, "grad_norm": 9.372945785522461, "learning_rate": 4.746379310344828e-07, "loss": 0.1132, "step": 272600 }, { "epoch": 2.68, "grad_norm": 5.469147682189941, "learning_rate": 4.7420689655172415e-07, "loss": 0.1684, "step": 272625 }, { "epoch": 2.68, "grad_norm": 6.5949320793151855, "learning_rate": 4.737758620689656e-07, "loss": 0.0946, "step": 272650 }, { "epoch": 2.68, "grad_norm": 6.657181739807129, "learning_rate": 4.7334482758620695e-07, "loss": 0.2041, "step": 272675 }, { "epoch": 2.68, "grad_norm": 12.675542831420898, "learning_rate": 4.729137931034483e-07, "loss": 0.0975, "step": 272700 }, { "epoch": 2.68, "grad_norm": 2.561708688735962, "learning_rate": 4.7248275862068966e-07, "loss": 0.1583, "step": 272725 }, { "epoch": 2.68, "grad_norm": 9.857803344726562, "learning_rate": 4.7205172413793106e-07, "loss": 0.0914, "step": 272750 }, { "epoch": 2.68, "grad_norm": 1.7209423780441284, "learning_rate": 4.7162068965517246e-07, "loss": 0.1915, "step": 272775 }, { "epoch": 2.68, "grad_norm": 13.755669593811035, "learning_rate": 4.7118965517241387e-07, "loss": 0.1066, "step": 272800 }, { "epoch": 2.68, "grad_norm": 4.184879302978516, "learning_rate": 4.707586206896552e-07, "loss": 0.1726, "step": 272825 }, { "epoch": 2.68, "grad_norm": 10.253052711486816, "learning_rate": 4.7032758620689657e-07, "loss": 0.093, "step": 272850 }, { "epoch": 2.68, "grad_norm": 2.9691011905670166, "learning_rate": 4.698965517241379e-07, "loss": 0.1509, "step": 272875 }, { "epoch": 2.68, "grad_norm": 7.368338584899902, "learning_rate": 4.694655172413794e-07, "loss": 0.0998, "step": 272900 }, { "epoch": 2.68, "grad_norm": 3.317038059234619, "learning_rate": 4.690344827586207e-07, "loss": 0.1585, "step": 272925 }, { "epoch": 2.68, "grad_norm": 14.126041412353516, "learning_rate": 4.686034482758621e-07, "loss": 0.0799, "step": 272950 }, { "epoch": 2.68, "grad_norm": 5.500337600708008, "learning_rate": 4.681724137931035e-07, "loss": 0.1647, "step": 272975 }, { "epoch": 2.68, "grad_norm": 17.229917526245117, "learning_rate": 4.677413793103449e-07, "loss": 0.0981, "step": 273000 }, { "epoch": 2.68, "grad_norm": 3.2712512016296387, "learning_rate": 4.673103448275863e-07, "loss": 0.153, "step": 273025 }, { "epoch": 2.68, "grad_norm": 6.611260414123535, "learning_rate": 4.6687931034482764e-07, "loss": 0.1021, "step": 273050 }, { "epoch": 2.68, "grad_norm": 6.191993236541748, "learning_rate": 4.66448275862069e-07, "loss": 0.2029, "step": 273075 }, { "epoch": 2.69, "grad_norm": 10.370655059814453, "learning_rate": 4.6601724137931034e-07, "loss": 0.0851, "step": 273100 }, { "epoch": 2.69, "grad_norm": 0.24413146078586578, "learning_rate": 4.655862068965518e-07, "loss": 0.1599, "step": 273125 }, { "epoch": 2.69, "grad_norm": 12.319316864013672, "learning_rate": 4.6515517241379314e-07, "loss": 0.0966, "step": 273150 }, { "epoch": 2.69, "grad_norm": 6.709877967834473, "learning_rate": 4.647241379310345e-07, "loss": 0.1466, "step": 273175 }, { "epoch": 2.69, "grad_norm": 12.525768280029297, "learning_rate": 4.642931034482759e-07, "loss": 0.1187, "step": 273200 }, { "epoch": 2.69, "grad_norm": 0.2818695604801178, "learning_rate": 4.6386206896551725e-07, "loss": 0.1817, "step": 273225 }, { "epoch": 2.69, "grad_norm": 14.12312126159668, "learning_rate": 4.634310344827587e-07, "loss": 0.0662, "step": 273250 }, { "epoch": 2.69, "grad_norm": 4.656497478485107, "learning_rate": 4.6300000000000006e-07, "loss": 0.1871, "step": 273275 }, { "epoch": 2.69, "grad_norm": 13.238561630249023, "learning_rate": 4.625689655172414e-07, "loss": 0.1129, "step": 273300 }, { "epoch": 2.69, "grad_norm": 1.1564960479736328, "learning_rate": 4.6213793103448276e-07, "loss": 0.2083, "step": 273325 }, { "epoch": 2.69, "grad_norm": 8.661458015441895, "learning_rate": 4.617068965517242e-07, "loss": 0.0692, "step": 273350 }, { "epoch": 2.69, "grad_norm": 0.7589114308357239, "learning_rate": 4.6127586206896556e-07, "loss": 0.181, "step": 273375 }, { "epoch": 2.69, "grad_norm": 10.756219863891602, "learning_rate": 4.608448275862069e-07, "loss": 0.0877, "step": 273400 }, { "epoch": 2.69, "grad_norm": 3.609502077102661, "learning_rate": 4.604137931034483e-07, "loss": 0.1757, "step": 273425 }, { "epoch": 2.69, "grad_norm": 7.659801006317139, "learning_rate": 4.5998275862068967e-07, "loss": 0.0931, "step": 273450 }, { "epoch": 2.69, "grad_norm": 3.269070863723755, "learning_rate": 4.595517241379311e-07, "loss": 0.1599, "step": 273475 }, { "epoch": 2.69, "grad_norm": 10.04604434967041, "learning_rate": 4.591206896551725e-07, "loss": 0.1115, "step": 273500 }, { "epoch": 2.69, "grad_norm": 5.0316667556762695, "learning_rate": 4.5868965517241383e-07, "loss": 0.1777, "step": 273525 }, { "epoch": 2.69, "grad_norm": 4.961092472076416, "learning_rate": 4.582586206896552e-07, "loss": 0.0798, "step": 273550 }, { "epoch": 2.69, "grad_norm": 0.7752677202224731, "learning_rate": 4.578275862068966e-07, "loss": 0.1756, "step": 273575 }, { "epoch": 2.69, "grad_norm": 7.455235481262207, "learning_rate": 4.57396551724138e-07, "loss": 0.0937, "step": 273600 }, { "epoch": 2.69, "grad_norm": 3.1480677127838135, "learning_rate": 4.5696551724137934e-07, "loss": 0.2093, "step": 273625 }, { "epoch": 2.69, "grad_norm": 10.731689453125, "learning_rate": 4.5653448275862074e-07, "loss": 0.0879, "step": 273650 }, { "epoch": 2.69, "grad_norm": 5.550832748413086, "learning_rate": 4.561034482758621e-07, "loss": 0.2011, "step": 273675 }, { "epoch": 2.69, "grad_norm": 26.973527908325195, "learning_rate": 4.5567241379310344e-07, "loss": 0.0794, "step": 273700 }, { "epoch": 2.69, "grad_norm": 0.6027733087539673, "learning_rate": 4.552413793103449e-07, "loss": 0.2377, "step": 273725 }, { "epoch": 2.69, "grad_norm": 12.495388984680176, "learning_rate": 4.5481034482758625e-07, "loss": 0.1026, "step": 273750 }, { "epoch": 2.69, "grad_norm": 4.849574565887451, "learning_rate": 4.543793103448276e-07, "loss": 0.1751, "step": 273775 }, { "epoch": 2.69, "grad_norm": 3.2679622173309326, "learning_rate": 4.53948275862069e-07, "loss": 0.0737, "step": 273800 }, { "epoch": 2.69, "grad_norm": 4.135931015014648, "learning_rate": 4.535172413793104e-07, "loss": 0.1863, "step": 273825 }, { "epoch": 2.69, "grad_norm": 9.69795036315918, "learning_rate": 4.5308620689655175e-07, "loss": 0.0982, "step": 273850 }, { "epoch": 2.69, "grad_norm": 6.494994640350342, "learning_rate": 4.5265517241379316e-07, "loss": 0.1644, "step": 273875 }, { "epoch": 2.69, "grad_norm": 16.10360336303711, "learning_rate": 4.522241379310345e-07, "loss": 0.0949, "step": 273900 }, { "epoch": 2.69, "grad_norm": 0.11636881530284882, "learning_rate": 4.5181034482758623e-07, "loss": 0.1517, "step": 273925 }, { "epoch": 2.69, "grad_norm": 7.9739179611206055, "learning_rate": 4.513793103448276e-07, "loss": 0.0999, "step": 273950 }, { "epoch": 2.69, "grad_norm": 3.9389712810516357, "learning_rate": 4.5094827586206904e-07, "loss": 0.1794, "step": 273975 }, { "epoch": 2.69, "grad_norm": 17.531986236572266, "learning_rate": 4.505172413793104e-07, "loss": 0.1219, "step": 274000 }, { "epoch": 2.69, "grad_norm": 6.717219352722168, "learning_rate": 4.5008620689655174e-07, "loss": 0.2235, "step": 274025 }, { "epoch": 2.69, "grad_norm": 10.402467727661133, "learning_rate": 4.4965517241379314e-07, "loss": 0.0826, "step": 274050 }, { "epoch": 2.69, "grad_norm": 2.5416347980499268, "learning_rate": 4.492241379310345e-07, "loss": 0.1584, "step": 274075 }, { "epoch": 2.69, "grad_norm": 10.23206615447998, "learning_rate": 4.487931034482759e-07, "loss": 0.1587, "step": 274100 }, { "epoch": 2.7, "grad_norm": 7.272228717803955, "learning_rate": 4.483620689655173e-07, "loss": 0.1799, "step": 274125 }, { "epoch": 2.7, "grad_norm": 11.165522575378418, "learning_rate": 4.4793103448275865e-07, "loss": 0.0846, "step": 274150 }, { "epoch": 2.7, "grad_norm": 3.0723326206207275, "learning_rate": 4.475e-07, "loss": 0.1443, "step": 274175 }, { "epoch": 2.7, "grad_norm": 10.610433578491211, "learning_rate": 4.470689655172414e-07, "loss": 0.0894, "step": 274200 }, { "epoch": 2.7, "grad_norm": 2.787480354309082, "learning_rate": 4.466379310344828e-07, "loss": 0.1731, "step": 274225 }, { "epoch": 2.7, "grad_norm": 12.256756782531738, "learning_rate": 4.4620689655172416e-07, "loss": 0.1311, "step": 274250 }, { "epoch": 2.7, "grad_norm": 3.9879605770111084, "learning_rate": 4.4577586206896556e-07, "loss": 0.163, "step": 274275 }, { "epoch": 2.7, "grad_norm": 12.082927703857422, "learning_rate": 4.453448275862069e-07, "loss": 0.0991, "step": 274300 }, { "epoch": 2.7, "grad_norm": 2.007833957672119, "learning_rate": 4.449137931034483e-07, "loss": 0.1605, "step": 274325 }, { "epoch": 2.7, "grad_norm": 7.756366729736328, "learning_rate": 4.444827586206897e-07, "loss": 0.0853, "step": 274350 }, { "epoch": 2.7, "grad_norm": 4.415721416473389, "learning_rate": 4.4405172413793107e-07, "loss": 0.17, "step": 274375 }, { "epoch": 2.7, "grad_norm": 14.952302932739258, "learning_rate": 4.436206896551724e-07, "loss": 0.1233, "step": 274400 }, { "epoch": 2.7, "grad_norm": 1.8582398891448975, "learning_rate": 4.4318965517241383e-07, "loss": 0.1365, "step": 274425 }, { "epoch": 2.7, "grad_norm": 10.657377243041992, "learning_rate": 4.4275862068965523e-07, "loss": 0.1102, "step": 274450 }, { "epoch": 2.7, "grad_norm": 8.800382614135742, "learning_rate": 4.423275862068966e-07, "loss": 0.1608, "step": 274475 }, { "epoch": 2.7, "grad_norm": 8.781957626342773, "learning_rate": 4.41896551724138e-07, "loss": 0.0813, "step": 274500 }, { "epoch": 2.7, "grad_norm": 2.529771089553833, "learning_rate": 4.4146551724137934e-07, "loss": 0.1802, "step": 274525 }, { "epoch": 2.7, "grad_norm": 11.5161771774292, "learning_rate": 4.410344827586207e-07, "loss": 0.0804, "step": 274550 }, { "epoch": 2.7, "grad_norm": 6.350882530212402, "learning_rate": 4.4060344827586214e-07, "loss": 0.1701, "step": 274575 }, { "epoch": 2.7, "grad_norm": 10.900217056274414, "learning_rate": 4.401724137931035e-07, "loss": 0.1134, "step": 274600 }, { "epoch": 2.7, "grad_norm": 3.651308536529541, "learning_rate": 4.3974137931034484e-07, "loss": 0.1733, "step": 274625 }, { "epoch": 2.7, "grad_norm": 9.885851860046387, "learning_rate": 4.3931034482758625e-07, "loss": 0.1059, "step": 274650 }, { "epoch": 2.7, "grad_norm": 3.4437170028686523, "learning_rate": 4.3887931034482765e-07, "loss": 0.1465, "step": 274675 }, { "epoch": 2.7, "grad_norm": 14.496506690979004, "learning_rate": 4.38448275862069e-07, "loss": 0.0844, "step": 274700 }, { "epoch": 2.7, "grad_norm": 5.602385520935059, "learning_rate": 4.380172413793104e-07, "loss": 0.1367, "step": 274725 }, { "epoch": 2.7, "grad_norm": 11.0258150100708, "learning_rate": 4.3758620689655176e-07, "loss": 0.1133, "step": 274750 }, { "epoch": 2.7, "grad_norm": 3.0871620178222656, "learning_rate": 4.371551724137931e-07, "loss": 0.1751, "step": 274775 }, { "epoch": 2.7, "grad_norm": 10.07815170288086, "learning_rate": 4.3672413793103456e-07, "loss": 0.1185, "step": 274800 }, { "epoch": 2.7, "grad_norm": 1.3902524709701538, "learning_rate": 4.362931034482759e-07, "loss": 0.1618, "step": 274825 }, { "epoch": 2.7, "grad_norm": 13.036667823791504, "learning_rate": 4.3586206896551726e-07, "loss": 0.1278, "step": 274850 }, { "epoch": 2.7, "grad_norm": 4.234474182128906, "learning_rate": 4.3543103448275867e-07, "loss": 0.1803, "step": 274875 }, { "epoch": 2.7, "grad_norm": 9.023462295532227, "learning_rate": 4.35e-07, "loss": 0.1242, "step": 274900 }, { "epoch": 2.7, "grad_norm": 4.344781398773193, "learning_rate": 4.345689655172414e-07, "loss": 0.1664, "step": 274925 }, { "epoch": 2.7, "grad_norm": 12.775615692138672, "learning_rate": 4.341379310344828e-07, "loss": 0.0916, "step": 274950 }, { "epoch": 2.7, "grad_norm": 2.735668182373047, "learning_rate": 4.337068965517242e-07, "loss": 0.1964, "step": 274975 }, { "epoch": 2.7, "grad_norm": 5.518528938293457, "learning_rate": 4.332758620689655e-07, "loss": 0.0798, "step": 275000 }, { "epoch": 2.7, "grad_norm": 4.935347080230713, "learning_rate": 4.328448275862069e-07, "loss": 0.1727, "step": 275025 }, { "epoch": 2.7, "grad_norm": 9.503621101379395, "learning_rate": 4.3241379310344833e-07, "loss": 0.0608, "step": 275050 }, { "epoch": 2.7, "grad_norm": 2.164931535720825, "learning_rate": 4.319827586206897e-07, "loss": 0.1459, "step": 275075 }, { "epoch": 2.7, "grad_norm": 9.332815170288086, "learning_rate": 4.315517241379311e-07, "loss": 0.0842, "step": 275100 }, { "epoch": 2.71, "grad_norm": 5.364612579345703, "learning_rate": 4.3112068965517244e-07, "loss": 0.1926, "step": 275125 }, { "epoch": 2.71, "grad_norm": 11.729472160339355, "learning_rate": 4.3068965517241384e-07, "loss": 0.0852, "step": 275150 }, { "epoch": 2.71, "grad_norm": 5.98704195022583, "learning_rate": 4.3025862068965524e-07, "loss": 0.1886, "step": 275175 }, { "epoch": 2.71, "grad_norm": 7.269370079040527, "learning_rate": 4.298275862068966e-07, "loss": 0.0912, "step": 275200 }, { "epoch": 2.71, "grad_norm": 0.30909496545791626, "learning_rate": 4.2939655172413795e-07, "loss": 0.2159, "step": 275225 }, { "epoch": 2.71, "grad_norm": 13.383541107177734, "learning_rate": 4.289655172413793e-07, "loss": 0.1222, "step": 275250 }, { "epoch": 2.71, "grad_norm": 8.101436614990234, "learning_rate": 4.2853448275862075e-07, "loss": 0.1836, "step": 275275 }, { "epoch": 2.71, "grad_norm": 12.272833824157715, "learning_rate": 4.281034482758621e-07, "loss": 0.0857, "step": 275300 }, { "epoch": 2.71, "grad_norm": 3.901855707168579, "learning_rate": 4.276724137931035e-07, "loss": 0.2033, "step": 275325 }, { "epoch": 2.71, "grad_norm": 16.069623947143555, "learning_rate": 4.2724137931034486e-07, "loss": 0.0997, "step": 275350 }, { "epoch": 2.71, "grad_norm": 5.715739727020264, "learning_rate": 4.268103448275862e-07, "loss": 0.1452, "step": 275375 }, { "epoch": 2.71, "grad_norm": 6.469614028930664, "learning_rate": 4.2637931034482766e-07, "loss": 0.0788, "step": 275400 }, { "epoch": 2.71, "grad_norm": 4.070469856262207, "learning_rate": 4.25948275862069e-07, "loss": 0.1552, "step": 275425 }, { "epoch": 2.71, "grad_norm": 9.8574857711792, "learning_rate": 4.2551724137931037e-07, "loss": 0.088, "step": 275450 }, { "epoch": 2.71, "grad_norm": 7.30880880355835, "learning_rate": 4.250862068965517e-07, "loss": 0.1552, "step": 275475 }, { "epoch": 2.71, "grad_norm": 5.746721267700195, "learning_rate": 4.2465517241379317e-07, "loss": 0.1017, "step": 275500 }, { "epoch": 2.71, "grad_norm": 2.8510682582855225, "learning_rate": 4.242241379310345e-07, "loss": 0.1702, "step": 275525 }, { "epoch": 2.71, "grad_norm": 9.46574592590332, "learning_rate": 4.237931034482759e-07, "loss": 0.0654, "step": 275550 }, { "epoch": 2.71, "grad_norm": 2.287217378616333, "learning_rate": 4.233620689655173e-07, "loss": 0.1852, "step": 275575 }, { "epoch": 2.71, "grad_norm": 10.078706741333008, "learning_rate": 4.2293103448275863e-07, "loss": 0.0903, "step": 275600 }, { "epoch": 2.71, "grad_norm": 2.8796017169952393, "learning_rate": 4.225000000000001e-07, "loss": 0.1883, "step": 275625 }, { "epoch": 2.71, "grad_norm": 8.046236038208008, "learning_rate": 4.2206896551724143e-07, "loss": 0.1147, "step": 275650 }, { "epoch": 2.71, "grad_norm": 4.637323379516602, "learning_rate": 4.216379310344828e-07, "loss": 0.1981, "step": 275675 }, { "epoch": 2.71, "grad_norm": 13.513072967529297, "learning_rate": 4.2120689655172414e-07, "loss": 0.1014, "step": 275700 }, { "epoch": 2.71, "grad_norm": 4.955990314483643, "learning_rate": 4.2077586206896554e-07, "loss": 0.1642, "step": 275725 }, { "epoch": 2.71, "grad_norm": 5.320460796356201, "learning_rate": 4.2034482758620694e-07, "loss": 0.0965, "step": 275750 }, { "epoch": 2.71, "grad_norm": 2.949463129043579, "learning_rate": 4.199137931034483e-07, "loss": 0.1923, "step": 275775 }, { "epoch": 2.71, "grad_norm": 9.70336627960205, "learning_rate": 4.194827586206897e-07, "loss": 0.0759, "step": 275800 }, { "epoch": 2.71, "grad_norm": 4.560792446136475, "learning_rate": 4.1905172413793105e-07, "loss": 0.158, "step": 275825 }, { "epoch": 2.71, "grad_norm": 10.76628589630127, "learning_rate": 4.186206896551725e-07, "loss": 0.0699, "step": 275850 }, { "epoch": 2.71, "grad_norm": 2.0724828243255615, "learning_rate": 4.1818965517241385e-07, "loss": 0.1753, "step": 275875 }, { "epoch": 2.71, "grad_norm": 7.184351444244385, "learning_rate": 4.177586206896552e-07, "loss": 0.1073, "step": 275900 }, { "epoch": 2.71, "grad_norm": 5.986196041107178, "learning_rate": 4.1732758620689656e-07, "loss": 0.1765, "step": 275925 }, { "epoch": 2.71, "grad_norm": 12.366711616516113, "learning_rate": 4.1689655172413796e-07, "loss": 0.0754, "step": 275950 }, { "epoch": 2.71, "grad_norm": 4.6226983070373535, "learning_rate": 4.1646551724137936e-07, "loss": 0.1934, "step": 275975 }, { "epoch": 2.71, "grad_norm": 7.994744777679443, "learning_rate": 4.160344827586207e-07, "loss": 0.1209, "step": 276000 }, { "epoch": 2.71, "grad_norm": 5.9567670822143555, "learning_rate": 4.156034482758621e-07, "loss": 0.1643, "step": 276025 }, { "epoch": 2.71, "grad_norm": 13.136282920837402, "learning_rate": 4.1517241379310347e-07, "loss": 0.0927, "step": 276050 }, { "epoch": 2.71, "grad_norm": 2.2479300498962402, "learning_rate": 4.147413793103448e-07, "loss": 0.1836, "step": 276075 }, { "epoch": 2.71, "grad_norm": 1.088531732559204, "learning_rate": 4.143103448275863e-07, "loss": 0.105, "step": 276100 }, { "epoch": 2.71, "grad_norm": 0.42705005407333374, "learning_rate": 4.138793103448276e-07, "loss": 0.1966, "step": 276125 }, { "epoch": 2.72, "grad_norm": 13.548666954040527, "learning_rate": 4.13448275862069e-07, "loss": 0.1068, "step": 276150 }, { "epoch": 2.72, "grad_norm": 3.7130134105682373, "learning_rate": 4.130172413793104e-07, "loss": 0.2073, "step": 276175 }, { "epoch": 2.72, "grad_norm": 13.27729606628418, "learning_rate": 4.1258620689655173e-07, "loss": 0.1054, "step": 276200 }, { "epoch": 2.72, "grad_norm": 5.636460304260254, "learning_rate": 4.1215517241379313e-07, "loss": 0.1541, "step": 276225 }, { "epoch": 2.72, "grad_norm": 12.133159637451172, "learning_rate": 4.1172413793103454e-07, "loss": 0.0909, "step": 276250 }, { "epoch": 2.72, "grad_norm": 3.700376510620117, "learning_rate": 4.112931034482759e-07, "loss": 0.1605, "step": 276275 }, { "epoch": 2.72, "grad_norm": 10.580060958862305, "learning_rate": 4.1086206896551724e-07, "loss": 0.0848, "step": 276300 }, { "epoch": 2.72, "grad_norm": 0.754286527633667, "learning_rate": 4.104310344827587e-07, "loss": 0.1793, "step": 276325 }, { "epoch": 2.72, "grad_norm": 4.670175075531006, "learning_rate": 4.1000000000000004e-07, "loss": 0.095, "step": 276350 }, { "epoch": 2.72, "grad_norm": 2.5315191745758057, "learning_rate": 4.0958620689655177e-07, "loss": 0.1812, "step": 276375 }, { "epoch": 2.72, "grad_norm": 12.533266067504883, "learning_rate": 4.091551724137931e-07, "loss": 0.075, "step": 276400 }, { "epoch": 2.72, "grad_norm": 0.8870700597763062, "learning_rate": 4.087241379310345e-07, "loss": 0.1603, "step": 276425 }, { "epoch": 2.72, "grad_norm": 32.848514556884766, "learning_rate": 4.082931034482759e-07, "loss": 0.1355, "step": 276450 }, { "epoch": 2.72, "grad_norm": 4.0541181564331055, "learning_rate": 4.0786206896551733e-07, "loss": 0.1748, "step": 276475 }, { "epoch": 2.72, "grad_norm": 7.6124444007873535, "learning_rate": 4.074310344827587e-07, "loss": 0.0866, "step": 276500 }, { "epoch": 2.72, "grad_norm": 2.5177597999572754, "learning_rate": 4.0700000000000003e-07, "loss": 0.1479, "step": 276525 }, { "epoch": 2.72, "grad_norm": 9.341069221496582, "learning_rate": 4.065689655172414e-07, "loss": 0.0732, "step": 276550 }, { "epoch": 2.72, "grad_norm": 4.476883411407471, "learning_rate": 4.061379310344828e-07, "loss": 0.2007, "step": 276575 }, { "epoch": 2.72, "grad_norm": 16.681720733642578, "learning_rate": 4.057068965517242e-07, "loss": 0.0944, "step": 276600 }, { "epoch": 2.72, "grad_norm": 6.830862045288086, "learning_rate": 4.0527586206896554e-07, "loss": 0.1569, "step": 276625 }, { "epoch": 2.72, "grad_norm": 12.413870811462402, "learning_rate": 4.0484482758620694e-07, "loss": 0.1084, "step": 276650 }, { "epoch": 2.72, "grad_norm": 1.8369606733322144, "learning_rate": 4.044137931034483e-07, "loss": 0.1376, "step": 276675 }, { "epoch": 2.72, "grad_norm": 7.054813861846924, "learning_rate": 4.0398275862068964e-07, "loss": 0.0735, "step": 276700 }, { "epoch": 2.72, "grad_norm": 3.6162798404693604, "learning_rate": 4.035517241379311e-07, "loss": 0.1689, "step": 276725 }, { "epoch": 2.72, "grad_norm": 10.564518928527832, "learning_rate": 4.0312068965517245e-07, "loss": 0.1031, "step": 276750 }, { "epoch": 2.72, "grad_norm": 0.8609318137168884, "learning_rate": 4.026896551724138e-07, "loss": 0.1789, "step": 276775 }, { "epoch": 2.72, "grad_norm": 13.991867065429688, "learning_rate": 4.022586206896552e-07, "loss": 0.0943, "step": 276800 }, { "epoch": 2.72, "grad_norm": 7.8855299949646, "learning_rate": 4.018275862068966e-07, "loss": 0.1952, "step": 276825 }, { "epoch": 2.72, "grad_norm": 11.09062671661377, "learning_rate": 4.0139655172413796e-07, "loss": 0.0768, "step": 276850 }, { "epoch": 2.72, "grad_norm": 4.357063293457031, "learning_rate": 4.0096551724137936e-07, "loss": 0.163, "step": 276875 }, { "epoch": 2.72, "grad_norm": 6.774941444396973, "learning_rate": 4.005344827586207e-07, "loss": 0.0886, "step": 276900 }, { "epoch": 2.72, "grad_norm": 6.687313079833984, "learning_rate": 4.0010344827586206e-07, "loss": 0.1772, "step": 276925 }, { "epoch": 2.72, "grad_norm": 8.86439037322998, "learning_rate": 3.996724137931035e-07, "loss": 0.0792, "step": 276950 }, { "epoch": 2.72, "grad_norm": 0.0434512235224247, "learning_rate": 3.9924137931034487e-07, "loss": 0.165, "step": 276975 }, { "epoch": 2.72, "grad_norm": 6.101830959320068, "learning_rate": 3.988103448275862e-07, "loss": 0.0776, "step": 277000 }, { "epoch": 2.72, "grad_norm": 2.1193296909332275, "learning_rate": 3.983793103448276e-07, "loss": 0.1812, "step": 277025 }, { "epoch": 2.72, "grad_norm": 5.796305179595947, "learning_rate": 3.97948275862069e-07, "loss": 0.1068, "step": 277050 }, { "epoch": 2.72, "grad_norm": 3.8665976524353027, "learning_rate": 3.975172413793104e-07, "loss": 0.1997, "step": 277075 }, { "epoch": 2.72, "grad_norm": 7.630683898925781, "learning_rate": 3.970862068965518e-07, "loss": 0.1104, "step": 277100 }, { "epoch": 2.72, "grad_norm": 7.164979457855225, "learning_rate": 3.9665517241379313e-07, "loss": 0.1742, "step": 277125 }, { "epoch": 2.72, "grad_norm": 27.933624267578125, "learning_rate": 3.962241379310345e-07, "loss": 0.0789, "step": 277150 }, { "epoch": 2.73, "grad_norm": 8.204562187194824, "learning_rate": 3.9579310344827594e-07, "loss": 0.1925, "step": 277175 }, { "epoch": 2.73, "grad_norm": 9.316349029541016, "learning_rate": 3.953620689655173e-07, "loss": 0.0913, "step": 277200 }, { "epoch": 2.73, "grad_norm": 4.691826820373535, "learning_rate": 3.9493103448275864e-07, "loss": 0.1783, "step": 277225 }, { "epoch": 2.73, "grad_norm": 16.06674575805664, "learning_rate": 3.9450000000000005e-07, "loss": 0.1027, "step": 277250 }, { "epoch": 2.73, "grad_norm": 4.667656421661377, "learning_rate": 3.940689655172414e-07, "loss": 0.2057, "step": 277275 }, { "epoch": 2.73, "grad_norm": 11.737641334533691, "learning_rate": 3.936379310344828e-07, "loss": 0.1005, "step": 277300 }, { "epoch": 2.73, "grad_norm": 4.6072306632995605, "learning_rate": 3.932068965517242e-07, "loss": 0.2091, "step": 277325 }, { "epoch": 2.73, "grad_norm": 12.889288902282715, "learning_rate": 3.9277586206896555e-07, "loss": 0.0947, "step": 277350 }, { "epoch": 2.73, "grad_norm": 2.7624239921569824, "learning_rate": 3.923448275862069e-07, "loss": 0.1663, "step": 277375 }, { "epoch": 2.73, "grad_norm": 11.648672103881836, "learning_rate": 3.9191379310344825e-07, "loss": 0.1041, "step": 277400 }, { "epoch": 2.73, "grad_norm": 2.6101784706115723, "learning_rate": 3.914827586206897e-07, "loss": 0.1794, "step": 277425 }, { "epoch": 2.73, "grad_norm": 11.027528762817383, "learning_rate": 3.9105172413793106e-07, "loss": 0.0989, "step": 277450 }, { "epoch": 2.73, "grad_norm": 4.167839527130127, "learning_rate": 3.9062068965517247e-07, "loss": 0.1459, "step": 277475 }, { "epoch": 2.73, "grad_norm": 12.356023788452148, "learning_rate": 3.901896551724138e-07, "loss": 0.0874, "step": 277500 }, { "epoch": 2.73, "grad_norm": 1.5775467157363892, "learning_rate": 3.8975862068965517e-07, "loss": 0.2074, "step": 277525 }, { "epoch": 2.73, "grad_norm": 6.570518493652344, "learning_rate": 3.893275862068966e-07, "loss": 0.1003, "step": 277550 }, { "epoch": 2.73, "grad_norm": 4.818848609924316, "learning_rate": 3.8889655172413797e-07, "loss": 0.1677, "step": 277575 }, { "epoch": 2.73, "grad_norm": 11.709494590759277, "learning_rate": 3.884655172413793e-07, "loss": 0.0927, "step": 277600 }, { "epoch": 2.73, "grad_norm": 3.4283933639526367, "learning_rate": 3.880344827586207e-07, "loss": 0.152, "step": 277625 }, { "epoch": 2.73, "grad_norm": 17.644901275634766, "learning_rate": 3.8760344827586213e-07, "loss": 0.0826, "step": 277650 }, { "epoch": 2.73, "grad_norm": 5.781620502471924, "learning_rate": 3.871724137931035e-07, "loss": 0.1698, "step": 277675 }, { "epoch": 2.73, "grad_norm": 5.9739484786987305, "learning_rate": 3.867413793103449e-07, "loss": 0.0582, "step": 277700 }, { "epoch": 2.73, "grad_norm": 3.992851734161377, "learning_rate": 3.8631034482758624e-07, "loss": 0.1962, "step": 277725 }, { "epoch": 2.73, "grad_norm": 7.631875991821289, "learning_rate": 3.858793103448276e-07, "loss": 0.0732, "step": 277750 }, { "epoch": 2.73, "grad_norm": 4.331505298614502, "learning_rate": 3.8544827586206904e-07, "loss": 0.1484, "step": 277775 }, { "epoch": 2.73, "grad_norm": 8.779815673828125, "learning_rate": 3.850172413793104e-07, "loss": 0.1277, "step": 277800 }, { "epoch": 2.73, "grad_norm": 2.980710744857788, "learning_rate": 3.8458620689655174e-07, "loss": 0.1471, "step": 277825 }, { "epoch": 2.73, "grad_norm": 23.207536697387695, "learning_rate": 3.841551724137931e-07, "loss": 0.1002, "step": 277850 }, { "epoch": 2.73, "grad_norm": 6.999622344970703, "learning_rate": 3.837241379310345e-07, "loss": 0.1843, "step": 277875 }, { "epoch": 2.73, "grad_norm": 7.348181247711182, "learning_rate": 3.832931034482759e-07, "loss": 0.0803, "step": 277900 }, { "epoch": 2.73, "grad_norm": 5.289138317108154, "learning_rate": 3.828620689655173e-07, "loss": 0.184, "step": 277925 }, { "epoch": 2.73, "grad_norm": 14.279273986816406, "learning_rate": 3.8243103448275866e-07, "loss": 0.0995, "step": 277950 }, { "epoch": 2.73, "grad_norm": 6.901388168334961, "learning_rate": 3.82e-07, "loss": 0.1772, "step": 277975 }, { "epoch": 2.73, "grad_norm": 11.141207695007324, "learning_rate": 3.8156896551724146e-07, "loss": 0.1427, "step": 278000 }, { "epoch": 2.73, "grad_norm": 4.135083198547363, "learning_rate": 3.811379310344828e-07, "loss": 0.1588, "step": 278025 }, { "epoch": 2.73, "grad_norm": 9.590917587280273, "learning_rate": 3.8070689655172416e-07, "loss": 0.0849, "step": 278050 }, { "epoch": 2.73, "grad_norm": 1.0837408304214478, "learning_rate": 3.802758620689655e-07, "loss": 0.233, "step": 278075 }, { "epoch": 2.73, "grad_norm": 9.003360748291016, "learning_rate": 3.798448275862069e-07, "loss": 0.118, "step": 278100 }, { "epoch": 2.73, "grad_norm": 3.120145082473755, "learning_rate": 3.794137931034483e-07, "loss": 0.1624, "step": 278125 }, { "epoch": 2.73, "grad_norm": 18.10856056213379, "learning_rate": 3.789827586206897e-07, "loss": 0.1002, "step": 278150 }, { "epoch": 2.74, "grad_norm": 5.2702860832214355, "learning_rate": 3.785517241379311e-07, "loss": 0.1552, "step": 278175 }, { "epoch": 2.74, "grad_norm": 9.8949556350708, "learning_rate": 3.781206896551724e-07, "loss": 0.0837, "step": 278200 }, { "epoch": 2.74, "grad_norm": 5.640615463256836, "learning_rate": 3.776896551724138e-07, "loss": 0.1656, "step": 278225 }, { "epoch": 2.74, "grad_norm": 9.66185474395752, "learning_rate": 3.7725862068965523e-07, "loss": 0.0896, "step": 278250 }, { "epoch": 2.74, "grad_norm": 3.3036391735076904, "learning_rate": 3.768275862068966e-07, "loss": 0.1548, "step": 278275 }, { "epoch": 2.74, "grad_norm": 9.620711326599121, "learning_rate": 3.7639655172413793e-07, "loss": 0.0831, "step": 278300 }, { "epoch": 2.74, "grad_norm": 5.956900119781494, "learning_rate": 3.7596551724137934e-07, "loss": 0.1688, "step": 278325 }, { "epoch": 2.74, "grad_norm": 10.982216835021973, "learning_rate": 3.755344827586207e-07, "loss": 0.0863, "step": 278350 }, { "epoch": 2.74, "grad_norm": 3.420175313949585, "learning_rate": 3.751206896551724e-07, "loss": 0.1881, "step": 278375 }, { "epoch": 2.74, "grad_norm": 10.786968231201172, "learning_rate": 3.7468965517241387e-07, "loss": 0.087, "step": 278400 }, { "epoch": 2.74, "grad_norm": 4.362918376922607, "learning_rate": 3.742586206896552e-07, "loss": 0.1877, "step": 278425 }, { "epoch": 2.74, "grad_norm": 14.170884132385254, "learning_rate": 3.7382758620689657e-07, "loss": 0.0873, "step": 278450 }, { "epoch": 2.74, "grad_norm": 3.9426956176757812, "learning_rate": 3.733965517241379e-07, "loss": 0.1571, "step": 278475 }, { "epoch": 2.74, "grad_norm": 6.875335216522217, "learning_rate": 3.729655172413794e-07, "loss": 0.116, "step": 278500 }, { "epoch": 2.74, "grad_norm": 5.565607070922852, "learning_rate": 3.7253448275862073e-07, "loss": 0.1921, "step": 278525 }, { "epoch": 2.74, "grad_norm": 4.476992130279541, "learning_rate": 3.721034482758621e-07, "loss": 0.0822, "step": 278550 }, { "epoch": 2.74, "grad_norm": 4.83958101272583, "learning_rate": 3.716724137931035e-07, "loss": 0.1907, "step": 278575 }, { "epoch": 2.74, "grad_norm": 11.67141342163086, "learning_rate": 3.7124137931034483e-07, "loss": 0.1141, "step": 278600 }, { "epoch": 2.74, "grad_norm": 3.670551300048828, "learning_rate": 3.708103448275863e-07, "loss": 0.1532, "step": 278625 }, { "epoch": 2.74, "grad_norm": 9.874048233032227, "learning_rate": 3.7037931034482764e-07, "loss": 0.1009, "step": 278650 }, { "epoch": 2.74, "grad_norm": 3.6269423961639404, "learning_rate": 3.69948275862069e-07, "loss": 0.2089, "step": 278675 }, { "epoch": 2.74, "grad_norm": 7.271410942077637, "learning_rate": 3.6951724137931034e-07, "loss": 0.0911, "step": 278700 }, { "epoch": 2.74, "grad_norm": 3.347024440765381, "learning_rate": 3.6908620689655174e-07, "loss": 0.1852, "step": 278725 }, { "epoch": 2.74, "grad_norm": 11.313100814819336, "learning_rate": 3.6865517241379315e-07, "loss": 0.1035, "step": 278750 }, { "epoch": 2.74, "grad_norm": 3.8553783893585205, "learning_rate": 3.682241379310345e-07, "loss": 0.1733, "step": 278775 }, { "epoch": 2.74, "grad_norm": 10.258349418640137, "learning_rate": 3.677931034482759e-07, "loss": 0.0809, "step": 278800 }, { "epoch": 2.74, "grad_norm": 4.0257792472839355, "learning_rate": 3.6736206896551725e-07, "loss": 0.1573, "step": 278825 }, { "epoch": 2.74, "grad_norm": 7.357805252075195, "learning_rate": 3.669310344827586e-07, "loss": 0.1227, "step": 278850 }, { "epoch": 2.74, "grad_norm": 1.7592202425003052, "learning_rate": 3.6650000000000006e-07, "loss": 0.1768, "step": 278875 }, { "epoch": 2.74, "grad_norm": 16.888240814208984, "learning_rate": 3.660689655172414e-07, "loss": 0.091, "step": 278900 }, { "epoch": 2.74, "grad_norm": 2.765476703643799, "learning_rate": 3.6563793103448276e-07, "loss": 0.1769, "step": 278925 }, { "epoch": 2.74, "grad_norm": 9.839079856872559, "learning_rate": 3.6520689655172416e-07, "loss": 0.089, "step": 278950 }, { "epoch": 2.74, "grad_norm": 1.8844096660614014, "learning_rate": 3.6477586206896557e-07, "loss": 0.1767, "step": 278975 }, { "epoch": 2.74, "grad_norm": 10.86240005493164, "learning_rate": 3.643448275862069e-07, "loss": 0.1013, "step": 279000 }, { "epoch": 2.74, "grad_norm": 4.873654365539551, "learning_rate": 3.639137931034483e-07, "loss": 0.1673, "step": 279025 }, { "epoch": 2.74, "grad_norm": 10.632847785949707, "learning_rate": 3.6348275862068967e-07, "loss": 0.1162, "step": 279050 }, { "epoch": 2.74, "grad_norm": 0.5012978315353394, "learning_rate": 3.63051724137931e-07, "loss": 0.1842, "step": 279075 }, { "epoch": 2.74, "grad_norm": 5.172740936279297, "learning_rate": 3.626206896551725e-07, "loss": 0.0941, "step": 279100 }, { "epoch": 2.74, "grad_norm": 1.8006539344787598, "learning_rate": 3.6218965517241383e-07, "loss": 0.1624, "step": 279125 }, { "epoch": 2.74, "grad_norm": 9.93074893951416, "learning_rate": 3.617586206896552e-07, "loss": 0.0879, "step": 279150 }, { "epoch": 2.74, "grad_norm": 9.91169261932373, "learning_rate": 3.613275862068966e-07, "loss": 0.1583, "step": 279175 }, { "epoch": 2.75, "grad_norm": 5.826085567474365, "learning_rate": 3.6089655172413793e-07, "loss": 0.0921, "step": 279200 }, { "epoch": 2.75, "grad_norm": 7.065873622894287, "learning_rate": 3.6046551724137934e-07, "loss": 0.1368, "step": 279225 }, { "epoch": 2.75, "grad_norm": 8.984013557434082, "learning_rate": 3.6003448275862074e-07, "loss": 0.0851, "step": 279250 }, { "epoch": 2.75, "grad_norm": 7.549295902252197, "learning_rate": 3.596034482758621e-07, "loss": 0.1577, "step": 279275 }, { "epoch": 2.75, "grad_norm": 11.07157039642334, "learning_rate": 3.5917241379310344e-07, "loss": 0.0888, "step": 279300 }, { "epoch": 2.75, "grad_norm": 2.6613290309906006, "learning_rate": 3.587413793103449e-07, "loss": 0.1582, "step": 279325 }, { "epoch": 2.75, "grad_norm": 11.119193077087402, "learning_rate": 3.5831034482758625e-07, "loss": 0.1019, "step": 279350 }, { "epoch": 2.75, "grad_norm": 3.1020915508270264, "learning_rate": 3.578793103448276e-07, "loss": 0.2067, "step": 279375 }, { "epoch": 2.75, "grad_norm": 5.210853099822998, "learning_rate": 3.57448275862069e-07, "loss": 0.1172, "step": 279400 }, { "epoch": 2.75, "grad_norm": 4.074288368225098, "learning_rate": 3.5701724137931035e-07, "loss": 0.1705, "step": 279425 }, { "epoch": 2.75, "grad_norm": 9.655787467956543, "learning_rate": 3.5658620689655176e-07, "loss": 0.1007, "step": 279450 }, { "epoch": 2.75, "grad_norm": 5.519876480102539, "learning_rate": 3.5615517241379316e-07, "loss": 0.2059, "step": 279475 }, { "epoch": 2.75, "grad_norm": 5.200676441192627, "learning_rate": 3.557241379310345e-07, "loss": 0.0943, "step": 279500 }, { "epoch": 2.75, "grad_norm": 2.886744499206543, "learning_rate": 3.5529310344827586e-07, "loss": 0.1601, "step": 279525 }, { "epoch": 2.75, "grad_norm": 8.304437637329102, "learning_rate": 3.5486206896551727e-07, "loss": 0.1228, "step": 279550 }, { "epoch": 2.75, "grad_norm": 6.744257926940918, "learning_rate": 3.5443103448275867e-07, "loss": 0.1718, "step": 279575 }, { "epoch": 2.75, "grad_norm": 10.808964729309082, "learning_rate": 3.54e-07, "loss": 0.1081, "step": 279600 }, { "epoch": 2.75, "grad_norm": 5.179987907409668, "learning_rate": 3.535689655172414e-07, "loss": 0.1603, "step": 279625 }, { "epoch": 2.75, "grad_norm": 8.027647972106934, "learning_rate": 3.531379310344828e-07, "loss": 0.0904, "step": 279650 }, { "epoch": 2.75, "grad_norm": 2.8387577533721924, "learning_rate": 3.527068965517242e-07, "loss": 0.1702, "step": 279675 }, { "epoch": 2.75, "grad_norm": 11.3780517578125, "learning_rate": 3.522758620689656e-07, "loss": 0.08, "step": 279700 }, { "epoch": 2.75, "grad_norm": 4.570481777191162, "learning_rate": 3.5184482758620693e-07, "loss": 0.1626, "step": 279725 }, { "epoch": 2.75, "grad_norm": 16.130210876464844, "learning_rate": 3.514137931034483e-07, "loss": 0.0785, "step": 279750 }, { "epoch": 2.75, "grad_norm": 16.769628524780273, "learning_rate": 3.5098275862068963e-07, "loss": 0.1582, "step": 279775 }, { "epoch": 2.75, "grad_norm": 15.615890502929688, "learning_rate": 3.505517241379311e-07, "loss": 0.0857, "step": 279800 }, { "epoch": 2.75, "grad_norm": 3.067751169204712, "learning_rate": 3.5012068965517244e-07, "loss": 0.1622, "step": 279825 }, { "epoch": 2.75, "grad_norm": 8.3156156539917, "learning_rate": 3.4968965517241384e-07, "loss": 0.1164, "step": 279850 }, { "epoch": 2.75, "grad_norm": 2.050029993057251, "learning_rate": 3.492586206896552e-07, "loss": 0.2123, "step": 279875 }, { "epoch": 2.75, "grad_norm": 10.492119789123535, "learning_rate": 3.4882758620689654e-07, "loss": 0.1015, "step": 279900 }, { "epoch": 2.75, "grad_norm": 6.03339958190918, "learning_rate": 3.48396551724138e-07, "loss": 0.1772, "step": 279925 }, { "epoch": 2.75, "grad_norm": 8.508543968200684, "learning_rate": 3.4796551724137935e-07, "loss": 0.1037, "step": 279950 }, { "epoch": 2.75, "grad_norm": 5.522085189819336, "learning_rate": 3.475344827586207e-07, "loss": 0.1552, "step": 279975 }, { "epoch": 2.75, "grad_norm": 16.19511604309082, "learning_rate": 3.4710344827586205e-07, "loss": 0.0988, "step": 280000 }, { "epoch": 2.75, "eval_loss": 0.6423006057739258, "eval_runtime": 5974.4526, "eval_samples_per_second": 1.585, "eval_steps_per_second": 0.198, "eval_wer": 0.11515805060194424, "step": 280000 }, { "epoch": 2.75, "grad_norm": 6.357030868530273, "learning_rate": 3.4667241379310346e-07, "loss": 0.1813, "step": 280025 }, { "epoch": 2.75, "grad_norm": 9.563237190246582, "learning_rate": 3.4624137931034486e-07, "loss": 0.1252, "step": 280050 }, { "epoch": 2.75, "grad_norm": 4.896879196166992, "learning_rate": 3.4581034482758626e-07, "loss": 0.1837, "step": 280075 }, { "epoch": 2.75, "grad_norm": 5.317733287811279, "learning_rate": 3.453793103448276e-07, "loss": 0.0866, "step": 280100 }, { "epoch": 2.75, "grad_norm": 4.701028823852539, "learning_rate": 3.4494827586206896e-07, "loss": 0.1645, "step": 280125 }, { "epoch": 2.75, "grad_norm": 12.929760932922363, "learning_rate": 3.445172413793104e-07, "loss": 0.073, "step": 280150 }, { "epoch": 2.75, "grad_norm": 2.6195878982543945, "learning_rate": 3.4408620689655177e-07, "loss": 0.1888, "step": 280175 }, { "epoch": 2.75, "grad_norm": 10.406696319580078, "learning_rate": 3.436551724137931e-07, "loss": 0.089, "step": 280200 }, { "epoch": 2.76, "grad_norm": 4.2538838386535645, "learning_rate": 3.4322413793103447e-07, "loss": 0.1814, "step": 280225 }, { "epoch": 2.76, "grad_norm": 14.983543395996094, "learning_rate": 3.427931034482759e-07, "loss": 0.1109, "step": 280250 }, { "epoch": 2.76, "grad_norm": 3.3303582668304443, "learning_rate": 3.423620689655173e-07, "loss": 0.1667, "step": 280275 }, { "epoch": 2.76, "grad_norm": 7.0744452476501465, "learning_rate": 3.419310344827587e-07, "loss": 0.1066, "step": 280300 }, { "epoch": 2.76, "grad_norm": 1.8993303775787354, "learning_rate": 3.4150000000000003e-07, "loss": 0.1409, "step": 280325 }, { "epoch": 2.76, "grad_norm": 17.607606887817383, "learning_rate": 3.410689655172414e-07, "loss": 0.0904, "step": 280350 }, { "epoch": 2.76, "grad_norm": 3.257359504699707, "learning_rate": 3.4063793103448273e-07, "loss": 0.2062, "step": 280375 }, { "epoch": 2.76, "grad_norm": 14.752781867980957, "learning_rate": 3.402068965517242e-07, "loss": 0.1331, "step": 280400 }, { "epoch": 2.76, "grad_norm": 4.423745155334473, "learning_rate": 3.3977586206896554e-07, "loss": 0.1653, "step": 280425 }, { "epoch": 2.76, "grad_norm": 14.90488052368164, "learning_rate": 3.393448275862069e-07, "loss": 0.1087, "step": 280450 }, { "epoch": 2.76, "grad_norm": 7.712477207183838, "learning_rate": 3.3893103448275867e-07, "loss": 0.2234, "step": 280475 }, { "epoch": 2.76, "grad_norm": 16.925975799560547, "learning_rate": 3.385e-07, "loss": 0.1155, "step": 280500 }, { "epoch": 2.76, "grad_norm": 3.5703911781311035, "learning_rate": 3.3806896551724137e-07, "loss": 0.1734, "step": 280525 }, { "epoch": 2.76, "grad_norm": 9.38541030883789, "learning_rate": 3.3763793103448283e-07, "loss": 0.1159, "step": 280550 }, { "epoch": 2.76, "grad_norm": 3.1767027378082275, "learning_rate": 3.372068965517242e-07, "loss": 0.1664, "step": 280575 }, { "epoch": 2.76, "grad_norm": 3.7612009048461914, "learning_rate": 3.3677586206896553e-07, "loss": 0.1065, "step": 280600 }, { "epoch": 2.76, "grad_norm": 0.07298213243484497, "learning_rate": 3.363448275862069e-07, "loss": 0.176, "step": 280625 }, { "epoch": 2.76, "grad_norm": 7.6567702293396, "learning_rate": 3.3591379310344834e-07, "loss": 0.0697, "step": 280650 }, { "epoch": 2.76, "grad_norm": 5.838240146636963, "learning_rate": 3.354827586206897e-07, "loss": 0.1602, "step": 280675 }, { "epoch": 2.76, "grad_norm": 8.2628755569458, "learning_rate": 3.350517241379311e-07, "loss": 0.0733, "step": 280700 }, { "epoch": 2.76, "grad_norm": 2.2130463123321533, "learning_rate": 3.3462068965517244e-07, "loss": 0.1864, "step": 280725 }, { "epoch": 2.76, "grad_norm": 17.56263542175293, "learning_rate": 3.341896551724138e-07, "loss": 0.096, "step": 280750 }, { "epoch": 2.76, "grad_norm": 4.824863910675049, "learning_rate": 3.3375862068965525e-07, "loss": 0.1798, "step": 280775 }, { "epoch": 2.76, "grad_norm": 17.732751846313477, "learning_rate": 3.333275862068966e-07, "loss": 0.0931, "step": 280800 }, { "epoch": 2.76, "grad_norm": 4.536694526672363, "learning_rate": 3.3289655172413795e-07, "loss": 0.145, "step": 280825 }, { "epoch": 2.76, "grad_norm": 11.066266059875488, "learning_rate": 3.324655172413793e-07, "loss": 0.1112, "step": 280850 }, { "epoch": 2.76, "grad_norm": 3.2387547492980957, "learning_rate": 3.320344827586207e-07, "loss": 0.1774, "step": 280875 }, { "epoch": 2.76, "grad_norm": 11.63093376159668, "learning_rate": 3.316034482758621e-07, "loss": 0.0938, "step": 280900 }, { "epoch": 2.76, "grad_norm": 1.156498908996582, "learning_rate": 3.311724137931035e-07, "loss": 0.1631, "step": 280925 }, { "epoch": 2.76, "grad_norm": 9.624261856079102, "learning_rate": 3.3074137931034486e-07, "loss": 0.0883, "step": 280950 }, { "epoch": 2.76, "grad_norm": 5.346285343170166, "learning_rate": 3.303103448275862e-07, "loss": 0.1536, "step": 280975 }, { "epoch": 2.76, "grad_norm": 8.992032051086426, "learning_rate": 3.2987931034482767e-07, "loss": 0.1143, "step": 281000 }, { "epoch": 2.76, "grad_norm": 5.06273078918457, "learning_rate": 3.29448275862069e-07, "loss": 0.155, "step": 281025 }, { "epoch": 2.76, "grad_norm": 8.97339916229248, "learning_rate": 3.2901724137931037e-07, "loss": 0.0721, "step": 281050 }, { "epoch": 2.76, "grad_norm": 3.886516571044922, "learning_rate": 3.285862068965517e-07, "loss": 0.1974, "step": 281075 }, { "epoch": 2.76, "grad_norm": 1.4327234029769897, "learning_rate": 3.281551724137931e-07, "loss": 0.0977, "step": 281100 }, { "epoch": 2.76, "grad_norm": 2.085916042327881, "learning_rate": 3.277241379310345e-07, "loss": 0.1649, "step": 281125 }, { "epoch": 2.76, "grad_norm": 5.490955352783203, "learning_rate": 3.2729310344827593e-07, "loss": 0.0954, "step": 281150 }, { "epoch": 2.76, "grad_norm": 4.250877857208252, "learning_rate": 3.268620689655173e-07, "loss": 0.1515, "step": 281175 }, { "epoch": 2.76, "grad_norm": 11.385536193847656, "learning_rate": 3.2643103448275863e-07, "loss": 0.1289, "step": 281200 }, { "epoch": 2.77, "grad_norm": 8.712634086608887, "learning_rate": 3.26e-07, "loss": 0.16, "step": 281225 }, { "epoch": 2.77, "grad_norm": 9.144512176513672, "learning_rate": 3.2556896551724144e-07, "loss": 0.0634, "step": 281250 }, { "epoch": 2.77, "grad_norm": 2.0186290740966797, "learning_rate": 3.251379310344828e-07, "loss": 0.1559, "step": 281275 }, { "epoch": 2.77, "grad_norm": 11.091713905334473, "learning_rate": 3.2470689655172414e-07, "loss": 0.1263, "step": 281300 }, { "epoch": 2.77, "grad_norm": 2.412539005279541, "learning_rate": 3.2427586206896554e-07, "loss": 0.1755, "step": 281325 }, { "epoch": 2.77, "grad_norm": 9.137068748474121, "learning_rate": 3.238448275862069e-07, "loss": 0.091, "step": 281350 }, { "epoch": 2.77, "grad_norm": 1.027549147605896, "learning_rate": 3.2341379310344835e-07, "loss": 0.1866, "step": 281375 }, { "epoch": 2.77, "grad_norm": 8.251898765563965, "learning_rate": 3.229827586206897e-07, "loss": 0.0948, "step": 281400 }, { "epoch": 2.77, "grad_norm": 1.3454667329788208, "learning_rate": 3.2255172413793105e-07, "loss": 0.2203, "step": 281425 }, { "epoch": 2.77, "grad_norm": 13.234665870666504, "learning_rate": 3.221206896551724e-07, "loss": 0.0846, "step": 281450 }, { "epoch": 2.77, "grad_norm": 7.707387924194336, "learning_rate": 3.2168965517241386e-07, "loss": 0.1659, "step": 281475 }, { "epoch": 2.77, "grad_norm": 10.461577415466309, "learning_rate": 3.212586206896552e-07, "loss": 0.0836, "step": 281500 }, { "epoch": 2.77, "grad_norm": 7.333326816558838, "learning_rate": 3.2082758620689656e-07, "loss": 0.1533, "step": 281525 }, { "epoch": 2.77, "grad_norm": 7.045411109924316, "learning_rate": 3.2039655172413796e-07, "loss": 0.0803, "step": 281550 }, { "epoch": 2.77, "grad_norm": 3.351952314376831, "learning_rate": 3.199655172413793e-07, "loss": 0.1776, "step": 281575 }, { "epoch": 2.77, "grad_norm": 13.337319374084473, "learning_rate": 3.1953448275862077e-07, "loss": 0.1164, "step": 281600 }, { "epoch": 2.77, "grad_norm": 6.227311611175537, "learning_rate": 3.191034482758621e-07, "loss": 0.162, "step": 281625 }, { "epoch": 2.77, "grad_norm": 15.151639938354492, "learning_rate": 3.1867241379310347e-07, "loss": 0.0975, "step": 281650 }, { "epoch": 2.77, "grad_norm": 5.659428596496582, "learning_rate": 3.182413793103448e-07, "loss": 0.1888, "step": 281675 }, { "epoch": 2.77, "grad_norm": 10.801962852478027, "learning_rate": 3.178103448275862e-07, "loss": 0.1308, "step": 281700 }, { "epoch": 2.77, "grad_norm": 4.0070881843566895, "learning_rate": 3.1737931034482763e-07, "loss": 0.1819, "step": 281725 }, { "epoch": 2.77, "grad_norm": 7.502625465393066, "learning_rate": 3.16948275862069e-07, "loss": 0.0914, "step": 281750 }, { "epoch": 2.77, "grad_norm": 8.443320274353027, "learning_rate": 3.165172413793104e-07, "loss": 0.1648, "step": 281775 }, { "epoch": 2.77, "grad_norm": 8.029959678649902, "learning_rate": 3.1608620689655173e-07, "loss": 0.0861, "step": 281800 }, { "epoch": 2.77, "grad_norm": 6.618780612945557, "learning_rate": 3.1565517241379314e-07, "loss": 0.1683, "step": 281825 }, { "epoch": 2.77, "grad_norm": 6.993978023529053, "learning_rate": 3.1522413793103454e-07, "loss": 0.0703, "step": 281850 }, { "epoch": 2.77, "grad_norm": 5.003042221069336, "learning_rate": 3.147931034482759e-07, "loss": 0.1753, "step": 281875 }, { "epoch": 2.77, "grad_norm": 10.395920753479004, "learning_rate": 3.1436206896551724e-07, "loss": 0.0804, "step": 281900 }, { "epoch": 2.77, "grad_norm": 5.402153968811035, "learning_rate": 3.1393103448275864e-07, "loss": 0.1585, "step": 281925 }, { "epoch": 2.77, "grad_norm": 15.301591873168945, "learning_rate": 3.1350000000000005e-07, "loss": 0.099, "step": 281950 }, { "epoch": 2.77, "grad_norm": 0.11544325947761536, "learning_rate": 3.130689655172414e-07, "loss": 0.1595, "step": 281975 }, { "epoch": 2.77, "grad_norm": 18.296178817749023, "learning_rate": 3.126379310344828e-07, "loss": 0.0943, "step": 282000 }, { "epoch": 2.77, "grad_norm": 2.627274990081787, "learning_rate": 3.1220689655172415e-07, "loss": 0.1673, "step": 282025 }, { "epoch": 2.77, "grad_norm": 7.933919906616211, "learning_rate": 3.1177586206896556e-07, "loss": 0.0897, "step": 282050 }, { "epoch": 2.77, "grad_norm": 4.4856791496276855, "learning_rate": 3.113448275862069e-07, "loss": 0.1752, "step": 282075 }, { "epoch": 2.77, "grad_norm": 9.457770347595215, "learning_rate": 3.109137931034483e-07, "loss": 0.0922, "step": 282100 }, { "epoch": 2.77, "grad_norm": 4.146698951721191, "learning_rate": 3.1048275862068966e-07, "loss": 0.1861, "step": 282125 }, { "epoch": 2.77, "grad_norm": 14.22641658782959, "learning_rate": 3.1005172413793106e-07, "loss": 0.1062, "step": 282150 }, { "epoch": 2.77, "grad_norm": 4.655223369598389, "learning_rate": 3.096206896551724e-07, "loss": 0.1663, "step": 282175 }, { "epoch": 2.77, "grad_norm": 8.492791175842285, "learning_rate": 3.091896551724138e-07, "loss": 0.0594, "step": 282200 }, { "epoch": 2.77, "grad_norm": 1.886788249015808, "learning_rate": 3.087586206896552e-07, "loss": 0.1471, "step": 282225 }, { "epoch": 2.78, "grad_norm": 13.498649597167969, "learning_rate": 3.0832758620689657e-07, "loss": 0.0853, "step": 282250 }, { "epoch": 2.78, "grad_norm": 5.179561138153076, "learning_rate": 3.07896551724138e-07, "loss": 0.1288, "step": 282275 }, { "epoch": 2.78, "grad_norm": 14.08488655090332, "learning_rate": 3.074655172413793e-07, "loss": 0.0888, "step": 282300 }, { "epoch": 2.78, "grad_norm": 0.8033860325813293, "learning_rate": 3.0703448275862073e-07, "loss": 0.1637, "step": 282325 }, { "epoch": 2.78, "grad_norm": 11.099486351013184, "learning_rate": 3.066034482758621e-07, "loss": 0.1243, "step": 282350 }, { "epoch": 2.78, "grad_norm": 6.301819324493408, "learning_rate": 3.061724137931035e-07, "loss": 0.1713, "step": 282375 }, { "epoch": 2.78, "grad_norm": 11.325738906860352, "learning_rate": 3.0574137931034483e-07, "loss": 0.0887, "step": 282400 }, { "epoch": 2.78, "grad_norm": 4.0661725997924805, "learning_rate": 3.0531034482758624e-07, "loss": 0.1606, "step": 282425 }, { "epoch": 2.78, "grad_norm": 14.045316696166992, "learning_rate": 3.0487931034482764e-07, "loss": 0.119, "step": 282450 }, { "epoch": 2.78, "grad_norm": 3.0811104774475098, "learning_rate": 3.04448275862069e-07, "loss": 0.174, "step": 282475 }, { "epoch": 2.78, "grad_norm": 6.777609348297119, "learning_rate": 3.040172413793104e-07, "loss": 0.0898, "step": 282500 }, { "epoch": 2.78, "grad_norm": 4.165214538574219, "learning_rate": 3.0358620689655175e-07, "loss": 0.1631, "step": 282525 }, { "epoch": 2.78, "grad_norm": 8.5382661819458, "learning_rate": 3.031551724137931e-07, "loss": 0.1131, "step": 282550 }, { "epoch": 2.78, "grad_norm": 6.594174861907959, "learning_rate": 3.027413793103448e-07, "loss": 0.2124, "step": 282575 }, { "epoch": 2.78, "grad_norm": 11.920580863952637, "learning_rate": 3.023103448275862e-07, "loss": 0.0934, "step": 282600 }, { "epoch": 2.78, "grad_norm": 6.292272090911865, "learning_rate": 3.0187931034482763e-07, "loss": 0.1542, "step": 282625 }, { "epoch": 2.78, "grad_norm": 10.353873252868652, "learning_rate": 3.01448275862069e-07, "loss": 0.0859, "step": 282650 }, { "epoch": 2.78, "grad_norm": 2.7369251251220703, "learning_rate": 3.010172413793104e-07, "loss": 0.1707, "step": 282675 }, { "epoch": 2.78, "grad_norm": 14.107666015625, "learning_rate": 3.0058620689655173e-07, "loss": 0.0761, "step": 282700 }, { "epoch": 2.78, "grad_norm": 2.858278751373291, "learning_rate": 3.0015517241379314e-07, "loss": 0.1918, "step": 282725 }, { "epoch": 2.78, "grad_norm": 13.607640266418457, "learning_rate": 2.997241379310345e-07, "loss": 0.1009, "step": 282750 }, { "epoch": 2.78, "grad_norm": 1.3221958875656128, "learning_rate": 2.992931034482759e-07, "loss": 0.202, "step": 282775 }, { "epoch": 2.78, "grad_norm": 5.247147560119629, "learning_rate": 2.9886206896551724e-07, "loss": 0.1022, "step": 282800 }, { "epoch": 2.78, "grad_norm": 3.0817365646362305, "learning_rate": 2.9843103448275864e-07, "loss": 0.1637, "step": 282825 }, { "epoch": 2.78, "grad_norm": 10.68554973602295, "learning_rate": 2.9800000000000005e-07, "loss": 0.0893, "step": 282850 }, { "epoch": 2.78, "grad_norm": 4.38458251953125, "learning_rate": 2.975689655172414e-07, "loss": 0.1583, "step": 282875 }, { "epoch": 2.78, "grad_norm": 10.023724555969238, "learning_rate": 2.971379310344828e-07, "loss": 0.0786, "step": 282900 }, { "epoch": 2.78, "grad_norm": 4.189007759094238, "learning_rate": 2.9670689655172415e-07, "loss": 0.1772, "step": 282925 }, { "epoch": 2.78, "grad_norm": 13.151372909545898, "learning_rate": 2.9627586206896556e-07, "loss": 0.1153, "step": 282950 }, { "epoch": 2.78, "grad_norm": 5.062343120574951, "learning_rate": 2.958448275862069e-07, "loss": 0.1719, "step": 282975 }, { "epoch": 2.78, "grad_norm": 10.372514724731445, "learning_rate": 2.954137931034483e-07, "loss": 0.0896, "step": 283000 }, { "epoch": 2.78, "grad_norm": 2.493013381958008, "learning_rate": 2.9498275862068966e-07, "loss": 0.1276, "step": 283025 }, { "epoch": 2.78, "grad_norm": 7.1444315910339355, "learning_rate": 2.9455172413793106e-07, "loss": 0.0859, "step": 283050 }, { "epoch": 2.78, "grad_norm": 3.311044692993164, "learning_rate": 2.9412068965517247e-07, "loss": 0.1979, "step": 283075 }, { "epoch": 2.78, "grad_norm": 8.607746124267578, "learning_rate": 2.936896551724138e-07, "loss": 0.1081, "step": 283100 }, { "epoch": 2.78, "grad_norm": 3.82440447807312, "learning_rate": 2.932586206896552e-07, "loss": 0.1593, "step": 283125 }, { "epoch": 2.78, "grad_norm": 18.521595001220703, "learning_rate": 2.9282758620689657e-07, "loss": 0.106, "step": 283150 }, { "epoch": 2.78, "grad_norm": 4.254894256591797, "learning_rate": 2.923965517241379e-07, "loss": 0.1919, "step": 283175 }, { "epoch": 2.78, "grad_norm": 7.8832902908325195, "learning_rate": 2.9196551724137933e-07, "loss": 0.0874, "step": 283200 }, { "epoch": 2.78, "grad_norm": 6.285760879516602, "learning_rate": 2.915344827586207e-07, "loss": 0.1774, "step": 283225 }, { "epoch": 2.78, "grad_norm": 7.354010581970215, "learning_rate": 2.911034482758621e-07, "loss": 0.099, "step": 283250 }, { "epoch": 2.79, "grad_norm": 2.2052948474884033, "learning_rate": 2.906724137931035e-07, "loss": 0.2195, "step": 283275 }, { "epoch": 2.79, "grad_norm": 7.1012773513793945, "learning_rate": 2.902413793103449e-07, "loss": 0.1166, "step": 283300 }, { "epoch": 2.79, "grad_norm": 13.262579917907715, "learning_rate": 2.8981034482758624e-07, "loss": 0.1767, "step": 283325 }, { "epoch": 2.79, "grad_norm": 10.589295387268066, "learning_rate": 2.893793103448276e-07, "loss": 0.1087, "step": 283350 }, { "epoch": 2.79, "grad_norm": 4.1168413162231445, "learning_rate": 2.88948275862069e-07, "loss": 0.2022, "step": 283375 }, { "epoch": 2.79, "grad_norm": 6.6697869300842285, "learning_rate": 2.8851724137931034e-07, "loss": 0.0627, "step": 283400 }, { "epoch": 2.79, "grad_norm": 8.780108451843262, "learning_rate": 2.8808620689655175e-07, "loss": 0.2214, "step": 283425 }, { "epoch": 2.79, "grad_norm": 10.791183471679688, "learning_rate": 2.876551724137931e-07, "loss": 0.1095, "step": 283450 }, { "epoch": 2.79, "grad_norm": 0.5020946860313416, "learning_rate": 2.872241379310345e-07, "loss": 0.1885, "step": 283475 }, { "epoch": 2.79, "grad_norm": 7.997142791748047, "learning_rate": 2.867931034482759e-07, "loss": 0.0777, "step": 283500 }, { "epoch": 2.79, "grad_norm": 3.668175220489502, "learning_rate": 2.8636206896551725e-07, "loss": 0.1644, "step": 283525 }, { "epoch": 2.79, "grad_norm": 19.275423049926758, "learning_rate": 2.8593103448275866e-07, "loss": 0.089, "step": 283550 }, { "epoch": 2.79, "grad_norm": 1.3879140615463257, "learning_rate": 2.855e-07, "loss": 0.1388, "step": 283575 }, { "epoch": 2.79, "grad_norm": 11.652441024780273, "learning_rate": 2.850689655172414e-07, "loss": 0.1139, "step": 283600 }, { "epoch": 2.79, "grad_norm": 4.397675514221191, "learning_rate": 2.8463793103448276e-07, "loss": 0.1635, "step": 283625 }, { "epoch": 2.79, "grad_norm": 15.193754196166992, "learning_rate": 2.8420689655172417e-07, "loss": 0.0894, "step": 283650 }, { "epoch": 2.79, "grad_norm": 8.275984764099121, "learning_rate": 2.837758620689655e-07, "loss": 0.1529, "step": 283675 }, { "epoch": 2.79, "grad_norm": 11.360053062438965, "learning_rate": 2.833448275862069e-07, "loss": 0.1049, "step": 283700 }, { "epoch": 2.79, "grad_norm": 5.406158924102783, "learning_rate": 2.829137931034483e-07, "loss": 0.1616, "step": 283725 }, { "epoch": 2.79, "grad_norm": 7.265176773071289, "learning_rate": 2.824827586206897e-07, "loss": 0.0701, "step": 283750 }, { "epoch": 2.79, "grad_norm": 1.2285910844802856, "learning_rate": 2.820517241379311e-07, "loss": 0.1946, "step": 283775 }, { "epoch": 2.79, "grad_norm": 10.174477577209473, "learning_rate": 2.8162068965517243e-07, "loss": 0.0812, "step": 283800 }, { "epoch": 2.79, "grad_norm": 5.358548164367676, "learning_rate": 2.8118965517241383e-07, "loss": 0.196, "step": 283825 }, { "epoch": 2.79, "grad_norm": 8.631458282470703, "learning_rate": 2.807586206896552e-07, "loss": 0.0779, "step": 283850 }, { "epoch": 2.79, "grad_norm": 3.2280635833740234, "learning_rate": 2.803275862068966e-07, "loss": 0.22, "step": 283875 }, { "epoch": 2.79, "grad_norm": 4.749826908111572, "learning_rate": 2.7989655172413794e-07, "loss": 0.1235, "step": 283900 }, { "epoch": 2.79, "grad_norm": 1.4346702098846436, "learning_rate": 2.7946551724137934e-07, "loss": 0.1919, "step": 283925 }, { "epoch": 2.79, "grad_norm": 14.900646209716797, "learning_rate": 2.7903448275862074e-07, "loss": 0.1345, "step": 283950 }, { "epoch": 2.79, "grad_norm": 6.225987434387207, "learning_rate": 2.786034482758621e-07, "loss": 0.1855, "step": 283975 }, { "epoch": 2.79, "grad_norm": 7.848743915557861, "learning_rate": 2.781724137931035e-07, "loss": 0.114, "step": 284000 }, { "epoch": 2.79, "grad_norm": 6.667684555053711, "learning_rate": 2.7774137931034485e-07, "loss": 0.1697, "step": 284025 }, { "epoch": 2.79, "grad_norm": 9.930975914001465, "learning_rate": 2.773103448275862e-07, "loss": 0.0992, "step": 284050 }, { "epoch": 2.79, "grad_norm": 4.185628890991211, "learning_rate": 2.768793103448276e-07, "loss": 0.1676, "step": 284075 }, { "epoch": 2.79, "grad_norm": 14.907809257507324, "learning_rate": 2.76448275862069e-07, "loss": 0.0946, "step": 284100 }, { "epoch": 2.79, "grad_norm": 4.192559242248535, "learning_rate": 2.7601724137931036e-07, "loss": 0.1639, "step": 284125 }, { "epoch": 2.79, "grad_norm": 10.439252853393555, "learning_rate": 2.7558620689655176e-07, "loss": 0.0964, "step": 284150 }, { "epoch": 2.79, "grad_norm": 3.061068534851074, "learning_rate": 2.751551724137931e-07, "loss": 0.1608, "step": 284175 }, { "epoch": 2.79, "grad_norm": 16.60641098022461, "learning_rate": 2.747241379310345e-07, "loss": 0.0942, "step": 284200 }, { "epoch": 2.79, "grad_norm": 6.136343955993652, "learning_rate": 2.7429310344827586e-07, "loss": 0.1931, "step": 284225 }, { "epoch": 2.79, "grad_norm": 16.61565399169922, "learning_rate": 2.7386206896551727e-07, "loss": 0.1003, "step": 284250 }, { "epoch": 2.8, "grad_norm": 5.631192207336426, "learning_rate": 2.734310344827586e-07, "loss": 0.185, "step": 284275 }, { "epoch": 2.8, "grad_norm": 13.93435001373291, "learning_rate": 2.73e-07, "loss": 0.0943, "step": 284300 }, { "epoch": 2.8, "grad_norm": 6.7478556632995605, "learning_rate": 2.725689655172414e-07, "loss": 0.1503, "step": 284325 }, { "epoch": 2.8, "grad_norm": 5.849349021911621, "learning_rate": 2.721379310344828e-07, "loss": 0.0854, "step": 284350 }, { "epoch": 2.8, "grad_norm": 4.286627769470215, "learning_rate": 2.717068965517242e-07, "loss": 0.1808, "step": 284375 }, { "epoch": 2.8, "grad_norm": 13.558882713317871, "learning_rate": 2.7127586206896553e-07, "loss": 0.1241, "step": 284400 }, { "epoch": 2.8, "grad_norm": 4.221063613891602, "learning_rate": 2.7084482758620693e-07, "loss": 0.1888, "step": 284425 }, { "epoch": 2.8, "grad_norm": 7.106612682342529, "learning_rate": 2.704137931034483e-07, "loss": 0.1088, "step": 284450 }, { "epoch": 2.8, "grad_norm": 3.6651463508605957, "learning_rate": 2.699827586206897e-07, "loss": 0.1496, "step": 284475 }, { "epoch": 2.8, "grad_norm": 8.703900337219238, "learning_rate": 2.6955172413793104e-07, "loss": 0.0844, "step": 284500 }, { "epoch": 2.8, "grad_norm": 2.225285053253174, "learning_rate": 2.6912068965517244e-07, "loss": 0.1461, "step": 284525 }, { "epoch": 2.8, "grad_norm": 7.88032865524292, "learning_rate": 2.6868965517241385e-07, "loss": 0.0934, "step": 284550 }, { "epoch": 2.8, "grad_norm": 4.605072975158691, "learning_rate": 2.682586206896552e-07, "loss": 0.1491, "step": 284575 }, { "epoch": 2.8, "grad_norm": 14.299753189086914, "learning_rate": 2.678275862068966e-07, "loss": 0.0853, "step": 284600 }, { "epoch": 2.8, "grad_norm": 6.500218391418457, "learning_rate": 2.6739655172413795e-07, "loss": 0.1575, "step": 284625 }, { "epoch": 2.8, "grad_norm": 8.72325611114502, "learning_rate": 2.6696551724137935e-07, "loss": 0.081, "step": 284650 }, { "epoch": 2.8, "grad_norm": 2.044847011566162, "learning_rate": 2.665344827586207e-07, "loss": 0.188, "step": 284675 }, { "epoch": 2.8, "grad_norm": 30.278505325317383, "learning_rate": 2.6610344827586206e-07, "loss": 0.1076, "step": 284700 }, { "epoch": 2.8, "grad_norm": 2.2099342346191406, "learning_rate": 2.656896551724138e-07, "loss": 0.1718, "step": 284725 }, { "epoch": 2.8, "grad_norm": 8.442510604858398, "learning_rate": 2.652586206896552e-07, "loss": 0.0869, "step": 284750 }, { "epoch": 2.8, "grad_norm": 3.8459529876708984, "learning_rate": 2.648275862068966e-07, "loss": 0.1704, "step": 284775 }, { "epoch": 2.8, "grad_norm": 9.790176391601562, "learning_rate": 2.64396551724138e-07, "loss": 0.1112, "step": 284800 }, { "epoch": 2.8, "grad_norm": 3.2668559551239014, "learning_rate": 2.6396551724137934e-07, "loss": 0.1766, "step": 284825 }, { "epoch": 2.8, "grad_norm": 12.858515739440918, "learning_rate": 2.635344827586207e-07, "loss": 0.0993, "step": 284850 }, { "epoch": 2.8, "grad_norm": 4.270209312438965, "learning_rate": 2.631034482758621e-07, "loss": 0.2072, "step": 284875 }, { "epoch": 2.8, "grad_norm": 4.4361138343811035, "learning_rate": 2.6267241379310345e-07, "loss": 0.0901, "step": 284900 }, { "epoch": 2.8, "grad_norm": 1.438123345375061, "learning_rate": 2.6224137931034485e-07, "loss": 0.18, "step": 284925 }, { "epoch": 2.8, "grad_norm": 7.9575676918029785, "learning_rate": 2.618103448275862e-07, "loss": 0.1073, "step": 284950 }, { "epoch": 2.8, "grad_norm": 5.090607643127441, "learning_rate": 2.613793103448276e-07, "loss": 0.1672, "step": 284975 }, { "epoch": 2.8, "grad_norm": 10.039039611816406, "learning_rate": 2.60948275862069e-07, "loss": 0.0862, "step": 285000 }, { "epoch": 2.8, "grad_norm": 6.783121585845947, "learning_rate": 2.6051724137931036e-07, "loss": 0.2064, "step": 285025 }, { "epoch": 2.8, "grad_norm": 3.188359498977661, "learning_rate": 2.6008620689655176e-07, "loss": 0.0808, "step": 285050 }, { "epoch": 2.8, "grad_norm": 2.533033847808838, "learning_rate": 2.596551724137931e-07, "loss": 0.1339, "step": 285075 }, { "epoch": 2.8, "grad_norm": 9.851670265197754, "learning_rate": 2.592241379310345e-07, "loss": 0.0946, "step": 285100 }, { "epoch": 2.8, "grad_norm": 0.0972537025809288, "learning_rate": 2.5879310344827587e-07, "loss": 0.1826, "step": 285125 }, { "epoch": 2.8, "grad_norm": 11.479286193847656, "learning_rate": 2.5836206896551727e-07, "loss": 0.0908, "step": 285150 }, { "epoch": 2.8, "grad_norm": 6.284533977508545, "learning_rate": 2.579310344827586e-07, "loss": 0.2066, "step": 285175 }, { "epoch": 2.8, "grad_norm": 4.252290725708008, "learning_rate": 2.575e-07, "loss": 0.1006, "step": 285200 }, { "epoch": 2.8, "grad_norm": 3.2464563846588135, "learning_rate": 2.570689655172414e-07, "loss": 0.1475, "step": 285225 }, { "epoch": 2.8, "grad_norm": 14.04065227508545, "learning_rate": 2.566379310344828e-07, "loss": 0.1142, "step": 285250 }, { "epoch": 2.8, "grad_norm": 7.13704776763916, "learning_rate": 2.562068965517242e-07, "loss": 0.1562, "step": 285275 }, { "epoch": 2.81, "grad_norm": 8.597448348999023, "learning_rate": 2.5577586206896553e-07, "loss": 0.0863, "step": 285300 }, { "epoch": 2.81, "grad_norm": 5.040580749511719, "learning_rate": 2.5534482758620693e-07, "loss": 0.1741, "step": 285325 }, { "epoch": 2.81, "grad_norm": 12.818329811096191, "learning_rate": 2.549137931034483e-07, "loss": 0.08, "step": 285350 }, { "epoch": 2.81, "grad_norm": 3.7283077239990234, "learning_rate": 2.544827586206897e-07, "loss": 0.1916, "step": 285375 }, { "epoch": 2.81, "grad_norm": 4.713132381439209, "learning_rate": 2.5405172413793104e-07, "loss": 0.0731, "step": 285400 }, { "epoch": 2.81, "grad_norm": 5.571643829345703, "learning_rate": 2.5362068965517244e-07, "loss": 0.197, "step": 285425 }, { "epoch": 2.81, "grad_norm": 10.42041301727295, "learning_rate": 2.5318965517241385e-07, "loss": 0.1096, "step": 285450 }, { "epoch": 2.81, "grad_norm": 0.6206874251365662, "learning_rate": 2.527586206896552e-07, "loss": 0.1778, "step": 285475 }, { "epoch": 2.81, "grad_norm": 8.224148750305176, "learning_rate": 2.523275862068966e-07, "loss": 0.0987, "step": 285500 }, { "epoch": 2.81, "grad_norm": 2.9528727531433105, "learning_rate": 2.5189655172413795e-07, "loss": 0.1783, "step": 285525 }, { "epoch": 2.81, "grad_norm": 4.526755332946777, "learning_rate": 2.514655172413793e-07, "loss": 0.1087, "step": 285550 }, { "epoch": 2.81, "grad_norm": 1.5644475221633911, "learning_rate": 2.510344827586207e-07, "loss": 0.2038, "step": 285575 }, { "epoch": 2.81, "grad_norm": 13.028864860534668, "learning_rate": 2.506034482758621e-07, "loss": 0.0813, "step": 285600 }, { "epoch": 2.81, "grad_norm": 5.814693927764893, "learning_rate": 2.5017241379310346e-07, "loss": 0.1231, "step": 285625 }, { "epoch": 2.81, "grad_norm": 11.140887260437012, "learning_rate": 2.4974137931034486e-07, "loss": 0.081, "step": 285650 }, { "epoch": 2.81, "grad_norm": 4.288698673248291, "learning_rate": 2.493103448275862e-07, "loss": 0.1799, "step": 285675 }, { "epoch": 2.81, "grad_norm": 12.503189086914062, "learning_rate": 2.488793103448276e-07, "loss": 0.0833, "step": 285700 }, { "epoch": 2.81, "grad_norm": 6.914416313171387, "learning_rate": 2.4844827586206897e-07, "loss": 0.2165, "step": 285725 }, { "epoch": 2.81, "grad_norm": 11.284701347351074, "learning_rate": 2.4801724137931037e-07, "loss": 0.077, "step": 285750 }, { "epoch": 2.81, "grad_norm": 6.445131301879883, "learning_rate": 2.475862068965517e-07, "loss": 0.1564, "step": 285775 }, { "epoch": 2.81, "grad_norm": 10.330538749694824, "learning_rate": 2.471551724137931e-07, "loss": 0.0973, "step": 285800 }, { "epoch": 2.81, "grad_norm": 3.0130534172058105, "learning_rate": 2.4672413793103453e-07, "loss": 0.1903, "step": 285825 }, { "epoch": 2.81, "grad_norm": 14.449359893798828, "learning_rate": 2.462931034482759e-07, "loss": 0.1086, "step": 285850 }, { "epoch": 2.81, "grad_norm": 5.970054626464844, "learning_rate": 2.458620689655173e-07, "loss": 0.1552, "step": 285875 }, { "epoch": 2.81, "grad_norm": 2.387131929397583, "learning_rate": 2.4543103448275863e-07, "loss": 0.0739, "step": 285900 }, { "epoch": 2.81, "grad_norm": 2.035194158554077, "learning_rate": 2.4500000000000004e-07, "loss": 0.171, "step": 285925 }, { "epoch": 2.81, "grad_norm": 14.033767700195312, "learning_rate": 2.445689655172414e-07, "loss": 0.1044, "step": 285950 }, { "epoch": 2.81, "grad_norm": 1.5076621770858765, "learning_rate": 2.441379310344828e-07, "loss": 0.148, "step": 285975 }, { "epoch": 2.81, "grad_norm": 14.878557205200195, "learning_rate": 2.4370689655172414e-07, "loss": 0.1219, "step": 286000 }, { "epoch": 2.81, "grad_norm": 0.9518868327140808, "learning_rate": 2.4327586206896554e-07, "loss": 0.2244, "step": 286025 }, { "epoch": 2.81, "grad_norm": 9.495797157287598, "learning_rate": 2.428448275862069e-07, "loss": 0.0749, "step": 286050 }, { "epoch": 2.81, "grad_norm": 8.25293254852295, "learning_rate": 2.424137931034483e-07, "loss": 0.1944, "step": 286075 }, { "epoch": 2.81, "grad_norm": 11.943222045898438, "learning_rate": 2.419827586206897e-07, "loss": 0.0954, "step": 286100 }, { "epoch": 2.81, "grad_norm": 4.027529716491699, "learning_rate": 2.4155172413793105e-07, "loss": 0.1731, "step": 286125 }, { "epoch": 2.81, "grad_norm": 9.352141380310059, "learning_rate": 2.4112068965517246e-07, "loss": 0.0887, "step": 286150 }, { "epoch": 2.81, "grad_norm": 5.704948902130127, "learning_rate": 2.406896551724138e-07, "loss": 0.1575, "step": 286175 }, { "epoch": 2.81, "grad_norm": 15.8654203414917, "learning_rate": 2.4025862068965516e-07, "loss": 0.0798, "step": 286200 }, { "epoch": 2.81, "grad_norm": 3.273365020751953, "learning_rate": 2.3982758620689656e-07, "loss": 0.1811, "step": 286225 }, { "epoch": 2.81, "grad_norm": 12.153243064880371, "learning_rate": 2.3939655172413796e-07, "loss": 0.1005, "step": 286250 }, { "epoch": 2.81, "grad_norm": 1.221832036972046, "learning_rate": 2.389655172413793e-07, "loss": 0.1432, "step": 286275 }, { "epoch": 2.81, "grad_norm": 13.50602912902832, "learning_rate": 2.385344827586207e-07, "loss": 0.1116, "step": 286300 }, { "epoch": 2.82, "grad_norm": 1.0211669206619263, "learning_rate": 2.381034482758621e-07, "loss": 0.1345, "step": 286325 }, { "epoch": 2.82, "grad_norm": 13.787424087524414, "learning_rate": 2.3767241379310347e-07, "loss": 0.1067, "step": 286350 }, { "epoch": 2.82, "grad_norm": 0.2738341987133026, "learning_rate": 2.3724137931034482e-07, "loss": 0.1446, "step": 286375 }, { "epoch": 2.82, "grad_norm": 13.979629516601562, "learning_rate": 2.3681034482758623e-07, "loss": 0.1083, "step": 286400 }, { "epoch": 2.82, "grad_norm": 6.838694095611572, "learning_rate": 2.363793103448276e-07, "loss": 0.1793, "step": 286425 }, { "epoch": 2.82, "grad_norm": 8.525102615356445, "learning_rate": 2.35948275862069e-07, "loss": 0.1134, "step": 286450 }, { "epoch": 2.82, "grad_norm": 4.738741874694824, "learning_rate": 2.3551724137931036e-07, "loss": 0.1999, "step": 286475 }, { "epoch": 2.82, "grad_norm": 8.508076667785645, "learning_rate": 2.3508620689655174e-07, "loss": 0.0839, "step": 286500 }, { "epoch": 2.82, "grad_norm": 8.261890411376953, "learning_rate": 2.346551724137931e-07, "loss": 0.1713, "step": 286525 }, { "epoch": 2.82, "grad_norm": 15.016594886779785, "learning_rate": 2.342241379310345e-07, "loss": 0.0964, "step": 286550 }, { "epoch": 2.82, "grad_norm": 2.366218090057373, "learning_rate": 2.337931034482759e-07, "loss": 0.159, "step": 286575 }, { "epoch": 2.82, "grad_norm": 9.371865272521973, "learning_rate": 2.3336206896551724e-07, "loss": 0.1186, "step": 286600 }, { "epoch": 2.82, "grad_norm": 4.423198223114014, "learning_rate": 2.3293103448275865e-07, "loss": 0.1795, "step": 286625 }, { "epoch": 2.82, "grad_norm": 15.365581512451172, "learning_rate": 2.3250000000000002e-07, "loss": 0.0984, "step": 286650 }, { "epoch": 2.82, "grad_norm": 2.1956284046173096, "learning_rate": 2.3206896551724137e-07, "loss": 0.1632, "step": 286675 }, { "epoch": 2.82, "grad_norm": 12.765609741210938, "learning_rate": 2.3163793103448278e-07, "loss": 0.0935, "step": 286700 }, { "epoch": 2.82, "grad_norm": 4.21436071395874, "learning_rate": 2.3120689655172416e-07, "loss": 0.1622, "step": 286725 }, { "epoch": 2.82, "grad_norm": 15.33716106414795, "learning_rate": 2.3077586206896553e-07, "loss": 0.1318, "step": 286750 }, { "epoch": 2.82, "grad_norm": 2.0421719551086426, "learning_rate": 2.303448275862069e-07, "loss": 0.1731, "step": 286775 }, { "epoch": 2.82, "grad_norm": 13.526161193847656, "learning_rate": 2.299137931034483e-07, "loss": 0.1, "step": 286800 }, { "epoch": 2.82, "grad_norm": 7.362422466278076, "learning_rate": 2.2950000000000004e-07, "loss": 0.1917, "step": 286825 }, { "epoch": 2.82, "grad_norm": 15.053821563720703, "learning_rate": 2.290689655172414e-07, "loss": 0.0972, "step": 286850 }, { "epoch": 2.82, "grad_norm": 6.3614726066589355, "learning_rate": 2.2863793103448276e-07, "loss": 0.2016, "step": 286875 }, { "epoch": 2.82, "grad_norm": 12.436543464660645, "learning_rate": 2.2820689655172417e-07, "loss": 0.101, "step": 286900 }, { "epoch": 2.82, "grad_norm": 3.906540632247925, "learning_rate": 2.2777586206896552e-07, "loss": 0.1939, "step": 286925 }, { "epoch": 2.82, "grad_norm": 10.617142677307129, "learning_rate": 2.2734482758620692e-07, "loss": 0.0993, "step": 286950 }, { "epoch": 2.82, "grad_norm": 3.910372734069824, "learning_rate": 2.269137931034483e-07, "loss": 0.1499, "step": 286975 }, { "epoch": 2.82, "grad_norm": 12.566999435424805, "learning_rate": 2.2648275862068965e-07, "loss": 0.1031, "step": 287000 }, { "epoch": 2.82, "grad_norm": 0.6143273115158081, "learning_rate": 2.2605172413793105e-07, "loss": 0.1723, "step": 287025 }, { "epoch": 2.82, "grad_norm": 10.146441459655762, "learning_rate": 2.2562068965517243e-07, "loss": 0.1032, "step": 287050 }, { "epoch": 2.82, "grad_norm": 4.342233657836914, "learning_rate": 2.251896551724138e-07, "loss": 0.1692, "step": 287075 }, { "epoch": 2.82, "grad_norm": 8.266345024108887, "learning_rate": 2.2475862068965518e-07, "loss": 0.0835, "step": 287100 }, { "epoch": 2.82, "grad_norm": 5.184007167816162, "learning_rate": 2.243275862068966e-07, "loss": 0.176, "step": 287125 }, { "epoch": 2.82, "grad_norm": 9.724078178405762, "learning_rate": 2.2389655172413794e-07, "loss": 0.089, "step": 287150 }, { "epoch": 2.82, "grad_norm": 2.758232593536377, "learning_rate": 2.2346551724137932e-07, "loss": 0.1567, "step": 287175 }, { "epoch": 2.82, "grad_norm": 9.131028175354004, "learning_rate": 2.2303448275862072e-07, "loss": 0.0925, "step": 287200 }, { "epoch": 2.82, "grad_norm": 7.183652877807617, "learning_rate": 2.2260344827586207e-07, "loss": 0.1887, "step": 287225 }, { "epoch": 2.82, "grad_norm": 10.946891784667969, "learning_rate": 2.2217241379310347e-07, "loss": 0.1101, "step": 287250 }, { "epoch": 2.82, "grad_norm": 5.426390171051025, "learning_rate": 2.2174137931034485e-07, "loss": 0.1703, "step": 287275 }, { "epoch": 2.82, "grad_norm": 8.475959777832031, "learning_rate": 2.2131034482758623e-07, "loss": 0.0896, "step": 287300 }, { "epoch": 2.83, "grad_norm": 1.0378786325454712, "learning_rate": 2.208793103448276e-07, "loss": 0.1471, "step": 287325 }, { "epoch": 2.83, "grad_norm": 18.132017135620117, "learning_rate": 2.2044827586206898e-07, "loss": 0.0911, "step": 287350 }, { "epoch": 2.83, "grad_norm": 1.7724711894989014, "learning_rate": 2.2001724137931036e-07, "loss": 0.1583, "step": 287375 }, { "epoch": 2.83, "grad_norm": 8.125168800354004, "learning_rate": 2.1958620689655174e-07, "loss": 0.0872, "step": 287400 }, { "epoch": 2.83, "grad_norm": 3.7138454914093018, "learning_rate": 2.1915517241379314e-07, "loss": 0.1739, "step": 287425 }, { "epoch": 2.83, "grad_norm": 13.541650772094727, "learning_rate": 2.187241379310345e-07, "loss": 0.073, "step": 287450 }, { "epoch": 2.83, "grad_norm": 1.323655366897583, "learning_rate": 2.182931034482759e-07, "loss": 0.1977, "step": 287475 }, { "epoch": 2.83, "grad_norm": 17.844934463500977, "learning_rate": 2.1786206896551727e-07, "loss": 0.1225, "step": 287500 }, { "epoch": 2.83, "grad_norm": 4.820648670196533, "learning_rate": 2.1743103448275862e-07, "loss": 0.2004, "step": 287525 }, { "epoch": 2.83, "grad_norm": 10.53366470336914, "learning_rate": 2.1700000000000002e-07, "loss": 0.0832, "step": 287550 }, { "epoch": 2.83, "grad_norm": 6.386593341827393, "learning_rate": 2.1656896551724137e-07, "loss": 0.1791, "step": 287575 }, { "epoch": 2.83, "grad_norm": 5.2820820808410645, "learning_rate": 2.1613793103448278e-07, "loss": 0.0976, "step": 287600 }, { "epoch": 2.83, "grad_norm": 2.4430935382843018, "learning_rate": 2.1570689655172416e-07, "loss": 0.1474, "step": 287625 }, { "epoch": 2.83, "grad_norm": 11.93431282043457, "learning_rate": 2.1527586206896556e-07, "loss": 0.0957, "step": 287650 }, { "epoch": 2.83, "grad_norm": 4.018252372741699, "learning_rate": 2.148448275862069e-07, "loss": 0.1746, "step": 287675 }, { "epoch": 2.83, "grad_norm": 9.245079040527344, "learning_rate": 2.1441379310344829e-07, "loss": 0.0736, "step": 287700 }, { "epoch": 2.83, "grad_norm": 3.4127917289733887, "learning_rate": 2.139827586206897e-07, "loss": 0.1613, "step": 287725 }, { "epoch": 2.83, "grad_norm": 12.636598587036133, "learning_rate": 2.1355172413793104e-07, "loss": 0.1172, "step": 287750 }, { "epoch": 2.83, "grad_norm": 3.0505495071411133, "learning_rate": 2.1312068965517244e-07, "loss": 0.1476, "step": 287775 }, { "epoch": 2.83, "grad_norm": 12.16740608215332, "learning_rate": 2.126896551724138e-07, "loss": 0.1095, "step": 287800 }, { "epoch": 2.83, "grad_norm": 3.472775936126709, "learning_rate": 2.122586206896552e-07, "loss": 0.1813, "step": 287825 }, { "epoch": 2.83, "grad_norm": 12.900909423828125, "learning_rate": 2.1182758620689658e-07, "loss": 0.1005, "step": 287850 }, { "epoch": 2.83, "grad_norm": 4.327216625213623, "learning_rate": 2.1139655172413793e-07, "loss": 0.1462, "step": 287875 }, { "epoch": 2.83, "grad_norm": 9.176323890686035, "learning_rate": 2.1096551724137933e-07, "loss": 0.0928, "step": 287900 }, { "epoch": 2.83, "grad_norm": 3.8090646266937256, "learning_rate": 2.105344827586207e-07, "loss": 0.1847, "step": 287925 }, { "epoch": 2.83, "grad_norm": 4.251635551452637, "learning_rate": 2.101034482758621e-07, "loss": 0.0943, "step": 287950 }, { "epoch": 2.83, "grad_norm": 4.514775276184082, "learning_rate": 2.0967241379310346e-07, "loss": 0.1557, "step": 287975 }, { "epoch": 2.83, "grad_norm": 9.897181510925293, "learning_rate": 2.0924137931034484e-07, "loss": 0.0757, "step": 288000 }, { "epoch": 2.83, "grad_norm": 4.135583877563477, "learning_rate": 2.0881034482758621e-07, "loss": 0.1779, "step": 288025 }, { "epoch": 2.83, "grad_norm": 13.971656799316406, "learning_rate": 2.083793103448276e-07, "loss": 0.1017, "step": 288050 }, { "epoch": 2.83, "grad_norm": 5.301190376281738, "learning_rate": 2.07948275862069e-07, "loss": 0.1396, "step": 288075 }, { "epoch": 2.83, "grad_norm": 4.758100509643555, "learning_rate": 2.0751724137931035e-07, "loss": 0.0962, "step": 288100 }, { "epoch": 2.83, "grad_norm": 6.678051471710205, "learning_rate": 2.0708620689655175e-07, "loss": 0.1722, "step": 288125 }, { "epoch": 2.83, "grad_norm": 8.595075607299805, "learning_rate": 2.0665517241379313e-07, "loss": 0.1104, "step": 288150 }, { "epoch": 2.83, "grad_norm": 8.710545539855957, "learning_rate": 2.0622413793103448e-07, "loss": 0.1897, "step": 288175 }, { "epoch": 2.83, "grad_norm": 16.943939208984375, "learning_rate": 2.0579310344827588e-07, "loss": 0.0684, "step": 288200 }, { "epoch": 2.83, "grad_norm": 5.383914947509766, "learning_rate": 2.053793103448276e-07, "loss": 0.2052, "step": 288225 }, { "epoch": 2.83, "grad_norm": 13.545278549194336, "learning_rate": 2.0494827586206898e-07, "loss": 0.099, "step": 288250 }, { "epoch": 2.83, "grad_norm": 10.130949974060059, "learning_rate": 2.0451724137931038e-07, "loss": 0.1548, "step": 288275 }, { "epoch": 2.83, "grad_norm": 9.643112182617188, "learning_rate": 2.0408620689655174e-07, "loss": 0.0998, "step": 288300 }, { "epoch": 2.83, "grad_norm": 4.887570858001709, "learning_rate": 2.0365517241379314e-07, "loss": 0.1526, "step": 288325 }, { "epoch": 2.84, "grad_norm": 12.23403263092041, "learning_rate": 2.032241379310345e-07, "loss": 0.1289, "step": 288350 }, { "epoch": 2.84, "grad_norm": 4.16295051574707, "learning_rate": 2.0279310344827587e-07, "loss": 0.1482, "step": 288375 }, { "epoch": 2.84, "grad_norm": 9.50444221496582, "learning_rate": 2.0236206896551727e-07, "loss": 0.0985, "step": 288400 }, { "epoch": 2.84, "grad_norm": 4.4620137214660645, "learning_rate": 2.0193103448275862e-07, "loss": 0.1687, "step": 288425 }, { "epoch": 2.84, "grad_norm": 10.290575981140137, "learning_rate": 2.0150000000000002e-07, "loss": 0.1094, "step": 288450 }, { "epoch": 2.84, "grad_norm": 5.761204719543457, "learning_rate": 2.010689655172414e-07, "loss": 0.1727, "step": 288475 }, { "epoch": 2.84, "grad_norm": 10.118365287780762, "learning_rate": 2.0063793103448275e-07, "loss": 0.0799, "step": 288500 }, { "epoch": 2.84, "grad_norm": 4.402824401855469, "learning_rate": 2.0020689655172416e-07, "loss": 0.1669, "step": 288525 }, { "epoch": 2.84, "grad_norm": 17.3824520111084, "learning_rate": 1.9977586206896553e-07, "loss": 0.0966, "step": 288550 }, { "epoch": 2.84, "grad_norm": 3.7586874961853027, "learning_rate": 1.993448275862069e-07, "loss": 0.1599, "step": 288575 }, { "epoch": 2.84, "grad_norm": 6.590234756469727, "learning_rate": 1.9891379310344829e-07, "loss": 0.0893, "step": 288600 }, { "epoch": 2.84, "grad_norm": 4.995328903198242, "learning_rate": 1.984827586206897e-07, "loss": 0.1549, "step": 288625 }, { "epoch": 2.84, "grad_norm": 7.91560697555542, "learning_rate": 1.9805172413793104e-07, "loss": 0.0886, "step": 288650 }, { "epoch": 2.84, "grad_norm": 0.06123846024274826, "learning_rate": 1.9762068965517242e-07, "loss": 0.1878, "step": 288675 }, { "epoch": 2.84, "grad_norm": 13.276201248168945, "learning_rate": 1.9718965517241382e-07, "loss": 0.1251, "step": 288700 }, { "epoch": 2.84, "grad_norm": 0.02167515456676483, "learning_rate": 1.9675862068965517e-07, "loss": 0.2104, "step": 288725 }, { "epoch": 2.84, "grad_norm": 6.68047571182251, "learning_rate": 1.9632758620689658e-07, "loss": 0.0671, "step": 288750 }, { "epoch": 2.84, "grad_norm": 5.675440311431885, "learning_rate": 1.9589655172413795e-07, "loss": 0.1496, "step": 288775 }, { "epoch": 2.84, "grad_norm": 9.876206398010254, "learning_rate": 1.9546551724137933e-07, "loss": 0.0585, "step": 288800 }, { "epoch": 2.84, "grad_norm": 2.0345497131347656, "learning_rate": 1.950344827586207e-07, "loss": 0.151, "step": 288825 }, { "epoch": 2.84, "grad_norm": 11.023467063903809, "learning_rate": 1.9460344827586206e-07, "loss": 0.1227, "step": 288850 }, { "epoch": 2.84, "grad_norm": 7.1804986000061035, "learning_rate": 1.9417241379310346e-07, "loss": 0.1612, "step": 288875 }, { "epoch": 2.84, "grad_norm": 5.404306888580322, "learning_rate": 1.9374137931034484e-07, "loss": 0.09, "step": 288900 }, { "epoch": 2.84, "grad_norm": 3.265299081802368, "learning_rate": 1.9331034482758624e-07, "loss": 0.1706, "step": 288925 }, { "epoch": 2.84, "grad_norm": 11.720108032226562, "learning_rate": 1.928793103448276e-07, "loss": 0.1313, "step": 288950 }, { "epoch": 2.84, "grad_norm": 7.705924034118652, "learning_rate": 1.92448275862069e-07, "loss": 0.1773, "step": 288975 }, { "epoch": 2.84, "grad_norm": 12.530106544494629, "learning_rate": 1.9201724137931037e-07, "loss": 0.0937, "step": 289000 }, { "epoch": 2.84, "grad_norm": 4.209263324737549, "learning_rate": 1.9158620689655172e-07, "loss": 0.1219, "step": 289025 }, { "epoch": 2.84, "grad_norm": 7.584659099578857, "learning_rate": 1.9115517241379313e-07, "loss": 0.09, "step": 289050 }, { "epoch": 2.84, "grad_norm": 0.6194761991500854, "learning_rate": 1.9072413793103448e-07, "loss": 0.1929, "step": 289075 }, { "epoch": 2.84, "grad_norm": 11.188459396362305, "learning_rate": 1.9029310344827588e-07, "loss": 0.0937, "step": 289100 }, { "epoch": 2.84, "grad_norm": 5.1540141105651855, "learning_rate": 1.8986206896551726e-07, "loss": 0.2031, "step": 289125 }, { "epoch": 2.84, "grad_norm": 13.958105087280273, "learning_rate": 1.8943103448275866e-07, "loss": 0.0726, "step": 289150 }, { "epoch": 2.84, "grad_norm": 8.374736785888672, "learning_rate": 1.89e-07, "loss": 0.2024, "step": 289175 }, { "epoch": 2.84, "grad_norm": 9.688337326049805, "learning_rate": 1.885689655172414e-07, "loss": 0.097, "step": 289200 }, { "epoch": 2.84, "grad_norm": 4.945779323577881, "learning_rate": 1.881379310344828e-07, "loss": 0.1726, "step": 289225 }, { "epoch": 2.84, "grad_norm": 14.367459297180176, "learning_rate": 1.8770689655172414e-07, "loss": 0.0845, "step": 289250 }, { "epoch": 2.84, "grad_norm": 27.61911964416504, "learning_rate": 1.8727586206896555e-07, "loss": 0.2024, "step": 289275 }, { "epoch": 2.84, "grad_norm": 16.194700241088867, "learning_rate": 1.868448275862069e-07, "loss": 0.0731, "step": 289300 }, { "epoch": 2.84, "grad_norm": 2.897193431854248, "learning_rate": 1.864137931034483e-07, "loss": 0.1611, "step": 289325 }, { "epoch": 2.84, "grad_norm": 11.85734748840332, "learning_rate": 1.8598275862068968e-07, "loss": 0.0943, "step": 289350 }, { "epoch": 2.85, "grad_norm": 7.435826301574707, "learning_rate": 1.8555172413793103e-07, "loss": 0.1658, "step": 289375 }, { "epoch": 2.85, "grad_norm": 7.154844284057617, "learning_rate": 1.8512068965517243e-07, "loss": 0.0776, "step": 289400 }, { "epoch": 2.85, "grad_norm": 5.936498165130615, "learning_rate": 1.846896551724138e-07, "loss": 0.1963, "step": 289425 }, { "epoch": 2.85, "grad_norm": 11.470497131347656, "learning_rate": 1.842586206896552e-07, "loss": 0.1028, "step": 289450 }, { "epoch": 2.85, "grad_norm": 3.3636486530303955, "learning_rate": 1.8382758620689656e-07, "loss": 0.1613, "step": 289475 }, { "epoch": 2.85, "grad_norm": 8.819095611572266, "learning_rate": 1.8339655172413794e-07, "loss": 0.0946, "step": 289500 }, { "epoch": 2.85, "grad_norm": 5.226517200469971, "learning_rate": 1.8296551724137932e-07, "loss": 0.173, "step": 289525 }, { "epoch": 2.85, "grad_norm": 9.242074966430664, "learning_rate": 1.825344827586207e-07, "loss": 0.0954, "step": 289550 }, { "epoch": 2.85, "grad_norm": 7.248740196228027, "learning_rate": 1.821034482758621e-07, "loss": 0.1864, "step": 289575 }, { "epoch": 2.85, "grad_norm": 12.645318984985352, "learning_rate": 1.8167241379310345e-07, "loss": 0.081, "step": 289600 }, { "epoch": 2.85, "grad_norm": 2.2809441089630127, "learning_rate": 1.8124137931034485e-07, "loss": 0.1516, "step": 289625 }, { "epoch": 2.85, "grad_norm": 5.835988521575928, "learning_rate": 1.8081034482758623e-07, "loss": 0.0852, "step": 289650 }, { "epoch": 2.85, "grad_norm": 4.907946586608887, "learning_rate": 1.8037931034482758e-07, "loss": 0.1667, "step": 289675 }, { "epoch": 2.85, "grad_norm": 7.828611373901367, "learning_rate": 1.7994827586206898e-07, "loss": 0.0964, "step": 289700 }, { "epoch": 2.85, "grad_norm": 1.6908615827560425, "learning_rate": 1.7951724137931036e-07, "loss": 0.1935, "step": 289725 }, { "epoch": 2.85, "grad_norm": 16.51581573486328, "learning_rate": 1.7908620689655174e-07, "loss": 0.1097, "step": 289750 }, { "epoch": 2.85, "grad_norm": 4.521432876586914, "learning_rate": 1.7865517241379311e-07, "loss": 0.1877, "step": 289775 }, { "epoch": 2.85, "grad_norm": 5.990104675292969, "learning_rate": 1.7822413793103452e-07, "loss": 0.1028, "step": 289800 }, { "epoch": 2.85, "grad_norm": 3.3122313022613525, "learning_rate": 1.7779310344827587e-07, "loss": 0.1844, "step": 289825 }, { "epoch": 2.85, "grad_norm": 10.56454086303711, "learning_rate": 1.7736206896551724e-07, "loss": 0.0718, "step": 289850 }, { "epoch": 2.85, "grad_norm": 5.271860122680664, "learning_rate": 1.7693103448275865e-07, "loss": 0.1823, "step": 289875 }, { "epoch": 2.85, "grad_norm": 11.925877571105957, "learning_rate": 1.765e-07, "loss": 0.0774, "step": 289900 }, { "epoch": 2.85, "grad_norm": 3.771052837371826, "learning_rate": 1.760689655172414e-07, "loss": 0.1742, "step": 289925 }, { "epoch": 2.85, "grad_norm": 18.18981170654297, "learning_rate": 1.7563793103448278e-07, "loss": 0.1033, "step": 289950 }, { "epoch": 2.85, "grad_norm": 4.838429927825928, "learning_rate": 1.7520689655172416e-07, "loss": 0.1768, "step": 289975 }, { "epoch": 2.85, "grad_norm": 6.43843936920166, "learning_rate": 1.7477586206896553e-07, "loss": 0.1044, "step": 290000 }, { "epoch": 2.85, "grad_norm": 2.504453182220459, "learning_rate": 1.7434482758620688e-07, "loss": 0.1874, "step": 290025 }, { "epoch": 2.85, "grad_norm": 3.482499122619629, "learning_rate": 1.739137931034483e-07, "loss": 0.1032, "step": 290050 }, { "epoch": 2.85, "grad_norm": 3.0487558841705322, "learning_rate": 1.7348275862068966e-07, "loss": 0.1664, "step": 290075 }, { "epoch": 2.85, "grad_norm": 11.178104400634766, "learning_rate": 1.7305172413793107e-07, "loss": 0.0953, "step": 290100 }, { "epoch": 2.85, "grad_norm": 5.191163539886475, "learning_rate": 1.7262068965517242e-07, "loss": 0.189, "step": 290125 }, { "epoch": 2.85, "grad_norm": 12.850743293762207, "learning_rate": 1.7218965517241382e-07, "loss": 0.101, "step": 290150 }, { "epoch": 2.85, "grad_norm": 1.6874817609786987, "learning_rate": 1.717586206896552e-07, "loss": 0.1839, "step": 290175 }, { "epoch": 2.85, "grad_norm": 7.802754878997803, "learning_rate": 1.7132758620689655e-07, "loss": 0.1058, "step": 290200 }, { "epoch": 2.85, "grad_norm": 4.909899711608887, "learning_rate": 1.7089655172413795e-07, "loss": 0.1778, "step": 290225 }, { "epoch": 2.85, "grad_norm": 9.193936347961426, "learning_rate": 1.704655172413793e-07, "loss": 0.0814, "step": 290250 }, { "epoch": 2.85, "grad_norm": 3.2493104934692383, "learning_rate": 1.700344827586207e-07, "loss": 0.142, "step": 290275 }, { "epoch": 2.85, "grad_norm": 6.930803298950195, "learning_rate": 1.6960344827586208e-07, "loss": 0.1061, "step": 290300 }, { "epoch": 2.85, "grad_norm": 5.641629219055176, "learning_rate": 1.691724137931035e-07, "loss": 0.1218, "step": 290325 }, { "epoch": 2.85, "grad_norm": 11.235881805419922, "learning_rate": 1.6874137931034484e-07, "loss": 0.1366, "step": 290350 }, { "epoch": 2.86, "grad_norm": 3.3023293018341064, "learning_rate": 1.6831034482758622e-07, "loss": 0.1519, "step": 290375 }, { "epoch": 2.86, "grad_norm": 15.17092514038086, "learning_rate": 1.6787931034482762e-07, "loss": 0.1239, "step": 290400 }, { "epoch": 2.86, "grad_norm": 1.9078465700149536, "learning_rate": 1.6744827586206897e-07, "loss": 0.1696, "step": 290425 }, { "epoch": 2.86, "grad_norm": 15.324020385742188, "learning_rate": 1.6701724137931037e-07, "loss": 0.1079, "step": 290450 }, { "epoch": 2.86, "grad_norm": 3.6759445667266846, "learning_rate": 1.6658620689655172e-07, "loss": 0.1727, "step": 290475 }, { "epoch": 2.86, "grad_norm": 7.141728401184082, "learning_rate": 1.661551724137931e-07, "loss": 0.1059, "step": 290500 }, { "epoch": 2.86, "grad_norm": 1.6312075853347778, "learning_rate": 1.657241379310345e-07, "loss": 0.17, "step": 290525 }, { "epoch": 2.86, "grad_norm": 5.867542743682861, "learning_rate": 1.6529310344827586e-07, "loss": 0.0822, "step": 290550 }, { "epoch": 2.86, "grad_norm": 2.361449718475342, "learning_rate": 1.6486206896551726e-07, "loss": 0.1733, "step": 290575 }, { "epoch": 2.86, "grad_norm": 9.9949951171875, "learning_rate": 1.6443103448275864e-07, "loss": 0.1007, "step": 290600 }, { "epoch": 2.86, "grad_norm": 7.0733489990234375, "learning_rate": 1.6400000000000004e-07, "loss": 0.1688, "step": 290625 }, { "epoch": 2.86, "grad_norm": 8.30943775177002, "learning_rate": 1.635689655172414e-07, "loss": 0.0822, "step": 290650 }, { "epoch": 2.86, "grad_norm": 3.480247974395752, "learning_rate": 1.6313793103448277e-07, "loss": 0.1649, "step": 290675 }, { "epoch": 2.86, "grad_norm": 16.891315460205078, "learning_rate": 1.6270689655172414e-07, "loss": 0.1198, "step": 290700 }, { "epoch": 2.86, "grad_norm": 2.9211666584014893, "learning_rate": 1.6227586206896552e-07, "loss": 0.1765, "step": 290725 }, { "epoch": 2.86, "grad_norm": 9.144673347473145, "learning_rate": 1.6184482758620692e-07, "loss": 0.0994, "step": 290750 }, { "epoch": 2.86, "grad_norm": 13.375228881835938, "learning_rate": 1.6141379310344827e-07, "loss": 0.1628, "step": 290775 }, { "epoch": 2.86, "grad_norm": 10.579558372497559, "learning_rate": 1.6098275862068968e-07, "loss": 0.0835, "step": 290800 }, { "epoch": 2.86, "grad_norm": 3.9846456050872803, "learning_rate": 1.6055172413793106e-07, "loss": 0.2037, "step": 290825 }, { "epoch": 2.86, "grad_norm": 16.30888557434082, "learning_rate": 1.601206896551724e-07, "loss": 0.1169, "step": 290850 }, { "epoch": 2.86, "grad_norm": 6.040767192840576, "learning_rate": 1.596896551724138e-07, "loss": 0.1635, "step": 290875 }, { "epoch": 2.86, "grad_norm": 8.957514762878418, "learning_rate": 1.5925862068965519e-07, "loss": 0.0878, "step": 290900 }, { "epoch": 2.86, "grad_norm": 8.531933784484863, "learning_rate": 1.5882758620689656e-07, "loss": 0.1474, "step": 290925 }, { "epoch": 2.86, "grad_norm": 8.668181419372559, "learning_rate": 1.5839655172413794e-07, "loss": 0.0856, "step": 290950 }, { "epoch": 2.86, "grad_norm": 6.198309898376465, "learning_rate": 1.5796551724137934e-07, "loss": 0.1399, "step": 290975 }, { "epoch": 2.86, "grad_norm": 21.214040756225586, "learning_rate": 1.575344827586207e-07, "loss": 0.0907, "step": 291000 }, { "epoch": 2.86, "grad_norm": 2.825120449066162, "learning_rate": 1.5710344827586207e-07, "loss": 0.2074, "step": 291025 }, { "epoch": 2.86, "grad_norm": 10.453917503356934, "learning_rate": 1.5667241379310348e-07, "loss": 0.0879, "step": 291050 }, { "epoch": 2.86, "grad_norm": 1.3479783535003662, "learning_rate": 1.5624137931034483e-07, "loss": 0.1265, "step": 291075 }, { "epoch": 2.86, "grad_norm": 9.633252143859863, "learning_rate": 1.558103448275862e-07, "loss": 0.0718, "step": 291100 }, { "epoch": 2.86, "grad_norm": 4.7671098709106445, "learning_rate": 1.553793103448276e-07, "loss": 0.1873, "step": 291125 }, { "epoch": 2.86, "grad_norm": 13.516877174377441, "learning_rate": 1.5494827586206898e-07, "loss": 0.1104, "step": 291150 }, { "epoch": 2.86, "grad_norm": 0.5094437599182129, "learning_rate": 1.5451724137931036e-07, "loss": 0.1932, "step": 291175 }, { "epoch": 2.86, "grad_norm": 8.797487258911133, "learning_rate": 1.5408620689655174e-07, "loss": 0.0832, "step": 291200 }, { "epoch": 2.86, "grad_norm": 4.55523157119751, "learning_rate": 1.5365517241379311e-07, "loss": 0.1469, "step": 291225 }, { "epoch": 2.86, "grad_norm": 8.338932037353516, "learning_rate": 1.532241379310345e-07, "loss": 0.0831, "step": 291250 }, { "epoch": 2.86, "grad_norm": 6.122743606567383, "learning_rate": 1.5279310344827587e-07, "loss": 0.1544, "step": 291275 }, { "epoch": 2.86, "grad_norm": 13.648015022277832, "learning_rate": 1.5236206896551725e-07, "loss": 0.1014, "step": 291300 }, { "epoch": 2.86, "grad_norm": 1.1495475769042969, "learning_rate": 1.5193103448275862e-07, "loss": 0.1548, "step": 291325 }, { "epoch": 2.86, "grad_norm": 13.204207420349121, "learning_rate": 1.5150000000000003e-07, "loss": 0.0922, "step": 291350 }, { "epoch": 2.86, "grad_norm": 6.436310768127441, "learning_rate": 1.510689655172414e-07, "loss": 0.1972, "step": 291375 }, { "epoch": 2.87, "grad_norm": 8.88524341583252, "learning_rate": 1.5063793103448278e-07, "loss": 0.1051, "step": 291400 }, { "epoch": 2.87, "grad_norm": 4.517423629760742, "learning_rate": 1.5020689655172413e-07, "loss": 0.1953, "step": 291425 }, { "epoch": 2.87, "grad_norm": 14.911441802978516, "learning_rate": 1.4977586206896553e-07, "loss": 0.1167, "step": 291450 }, { "epoch": 2.87, "grad_norm": 2.551131248474121, "learning_rate": 1.493448275862069e-07, "loss": 0.1726, "step": 291475 }, { "epoch": 2.87, "grad_norm": 10.723548889160156, "learning_rate": 1.489137931034483e-07, "loss": 0.1032, "step": 291500 }, { "epoch": 2.87, "grad_norm": 5.760539531707764, "learning_rate": 1.4848275862068967e-07, "loss": 0.175, "step": 291525 }, { "epoch": 2.87, "grad_norm": 7.802069664001465, "learning_rate": 1.4805172413793104e-07, "loss": 0.0927, "step": 291550 }, { "epoch": 2.87, "grad_norm": 4.84967565536499, "learning_rate": 1.4762068965517242e-07, "loss": 0.1489, "step": 291575 }, { "epoch": 2.87, "grad_norm": 16.038389205932617, "learning_rate": 1.471896551724138e-07, "loss": 0.0907, "step": 291600 }, { "epoch": 2.87, "grad_norm": 6.038851737976074, "learning_rate": 1.4675862068965517e-07, "loss": 0.1651, "step": 291625 }, { "epoch": 2.87, "grad_norm": 12.215654373168945, "learning_rate": 1.4632758620689655e-07, "loss": 0.0821, "step": 291650 }, { "epoch": 2.87, "grad_norm": 3.0018832683563232, "learning_rate": 1.4589655172413795e-07, "loss": 0.1646, "step": 291675 }, { "epoch": 2.87, "grad_norm": 10.21243667602539, "learning_rate": 1.4546551724137933e-07, "loss": 0.1063, "step": 291700 }, { "epoch": 2.87, "grad_norm": 10.217367172241211, "learning_rate": 1.450344827586207e-07, "loss": 0.1599, "step": 291725 }, { "epoch": 2.87, "grad_norm": 8.423065185546875, "learning_rate": 1.4460344827586209e-07, "loss": 0.1181, "step": 291750 }, { "epoch": 2.87, "grad_norm": 4.951760768890381, "learning_rate": 1.4417241379310346e-07, "loss": 0.1693, "step": 291775 }, { "epoch": 2.87, "grad_norm": 12.897109985351562, "learning_rate": 1.4374137931034484e-07, "loss": 0.0952, "step": 291800 }, { "epoch": 2.87, "grad_norm": 5.068519592285156, "learning_rate": 1.4331034482758622e-07, "loss": 0.1859, "step": 291825 }, { "epoch": 2.87, "grad_norm": 13.19727897644043, "learning_rate": 1.428793103448276e-07, "loss": 0.0925, "step": 291850 }, { "epoch": 2.87, "grad_norm": 2.657188653945923, "learning_rate": 1.4244827586206897e-07, "loss": 0.1889, "step": 291875 }, { "epoch": 2.87, "grad_norm": 15.454710960388184, "learning_rate": 1.4201724137931037e-07, "loss": 0.0926, "step": 291900 }, { "epoch": 2.87, "grad_norm": 3.237380027770996, "learning_rate": 1.4158620689655173e-07, "loss": 0.163, "step": 291925 }, { "epoch": 2.87, "grad_norm": 11.765766143798828, "learning_rate": 1.411551724137931e-07, "loss": 0.1231, "step": 291950 }, { "epoch": 2.87, "grad_norm": 2.2179958820343018, "learning_rate": 1.407241379310345e-07, "loss": 0.1595, "step": 291975 }, { "epoch": 2.87, "grad_norm": 7.9251861572265625, "learning_rate": 1.4029310344827588e-07, "loss": 0.1129, "step": 292000 }, { "epoch": 2.87, "grad_norm": 3.1865875720977783, "learning_rate": 1.3986206896551726e-07, "loss": 0.2028, "step": 292025 }, { "epoch": 2.87, "grad_norm": 8.700489044189453, "learning_rate": 1.3943103448275864e-07, "loss": 0.1118, "step": 292050 }, { "epoch": 2.87, "grad_norm": 3.671264886856079, "learning_rate": 1.3900000000000001e-07, "loss": 0.2183, "step": 292075 }, { "epoch": 2.87, "grad_norm": 10.737680435180664, "learning_rate": 1.385689655172414e-07, "loss": 0.099, "step": 292100 }, { "epoch": 2.87, "grad_norm": 5.566004276275635, "learning_rate": 1.3813793103448277e-07, "loss": 0.1554, "step": 292125 }, { "epoch": 2.87, "grad_norm": 11.367910385131836, "learning_rate": 1.3770689655172414e-07, "loss": 0.0953, "step": 292150 }, { "epoch": 2.87, "grad_norm": 5.976807117462158, "learning_rate": 1.3727586206896552e-07, "loss": 0.1563, "step": 292175 }, { "epoch": 2.87, "grad_norm": 13.735427856445312, "learning_rate": 1.3684482758620693e-07, "loss": 0.1028, "step": 292200 }, { "epoch": 2.87, "grad_norm": 4.296465873718262, "learning_rate": 1.364137931034483e-07, "loss": 0.1392, "step": 292225 }, { "epoch": 2.87, "grad_norm": 11.464859962463379, "learning_rate": 1.3598275862068965e-07, "loss": 0.106, "step": 292250 }, { "epoch": 2.87, "grad_norm": 2.05802059173584, "learning_rate": 1.3555172413793103e-07, "loss": 0.1373, "step": 292275 }, { "epoch": 2.87, "grad_norm": 3.5987133979797363, "learning_rate": 1.3512068965517243e-07, "loss": 0.0907, "step": 292300 }, { "epoch": 2.87, "grad_norm": 3.335247755050659, "learning_rate": 1.346896551724138e-07, "loss": 0.1569, "step": 292325 }, { "epoch": 2.87, "grad_norm": 14.044990539550781, "learning_rate": 1.342586206896552e-07, "loss": 0.1234, "step": 292350 }, { "epoch": 2.87, "grad_norm": 4.136536121368408, "learning_rate": 1.3382758620689656e-07, "loss": 0.1629, "step": 292375 }, { "epoch": 2.87, "grad_norm": 12.112540245056152, "learning_rate": 1.3339655172413794e-07, "loss": 0.0882, "step": 292400 }, { "epoch": 2.88, "grad_norm": 3.7244086265563965, "learning_rate": 1.3296551724137932e-07, "loss": 0.1758, "step": 292425 }, { "epoch": 2.88, "grad_norm": 13.63486099243164, "learning_rate": 1.325344827586207e-07, "loss": 0.0879, "step": 292450 }, { "epoch": 2.88, "grad_norm": 1.26143217086792, "learning_rate": 1.3212068965517242e-07, "loss": 0.1539, "step": 292475 }, { "epoch": 2.88, "grad_norm": 5.534912109375, "learning_rate": 1.316896551724138e-07, "loss": 0.1001, "step": 292500 }, { "epoch": 2.88, "grad_norm": 4.918442726135254, "learning_rate": 1.3125862068965517e-07, "loss": 0.1822, "step": 292525 }, { "epoch": 2.88, "grad_norm": 11.657575607299805, "learning_rate": 1.3082758620689658e-07, "loss": 0.0792, "step": 292550 }, { "epoch": 2.88, "grad_norm": 4.841769695281982, "learning_rate": 1.3039655172413793e-07, "loss": 0.1787, "step": 292575 }, { "epoch": 2.88, "grad_norm": 8.890098571777344, "learning_rate": 1.299655172413793e-07, "loss": 0.0854, "step": 292600 }, { "epoch": 2.88, "grad_norm": 4.715978622436523, "learning_rate": 1.295344827586207e-07, "loss": 0.2155, "step": 292625 }, { "epoch": 2.88, "grad_norm": 11.815450668334961, "learning_rate": 1.2910344827586209e-07, "loss": 0.0859, "step": 292650 }, { "epoch": 2.88, "grad_norm": 3.2737488746643066, "learning_rate": 1.2867241379310346e-07, "loss": 0.1893, "step": 292675 }, { "epoch": 2.88, "grad_norm": 12.805413246154785, "learning_rate": 1.2824137931034484e-07, "loss": 0.0991, "step": 292700 }, { "epoch": 2.88, "grad_norm": 5.790901184082031, "learning_rate": 1.2781034482758622e-07, "loss": 0.1687, "step": 292725 }, { "epoch": 2.88, "grad_norm": 13.236970901489258, "learning_rate": 1.273793103448276e-07, "loss": 0.1033, "step": 292750 }, { "epoch": 2.88, "grad_norm": 4.217457294464111, "learning_rate": 1.2694827586206897e-07, "loss": 0.1231, "step": 292775 }, { "epoch": 2.88, "grad_norm": 8.006176948547363, "learning_rate": 1.2651724137931035e-07, "loss": 0.0765, "step": 292800 }, { "epoch": 2.88, "grad_norm": 5.062435626983643, "learning_rate": 1.2608620689655173e-07, "loss": 0.1844, "step": 292825 }, { "epoch": 2.88, "grad_norm": 12.572607040405273, "learning_rate": 1.2565517241379313e-07, "loss": 0.1111, "step": 292850 }, { "epoch": 2.88, "grad_norm": 3.5951781272888184, "learning_rate": 1.252241379310345e-07, "loss": 0.1745, "step": 292875 }, { "epoch": 2.88, "grad_norm": 9.041309356689453, "learning_rate": 1.2479310344827588e-07, "loss": 0.0905, "step": 292900 }, { "epoch": 2.88, "grad_norm": 2.950477123260498, "learning_rate": 1.2436206896551723e-07, "loss": 0.1692, "step": 292925 }, { "epoch": 2.88, "grad_norm": 11.708538055419922, "learning_rate": 1.2393103448275864e-07, "loss": 0.0676, "step": 292950 }, { "epoch": 2.88, "grad_norm": 6.984528541564941, "learning_rate": 1.2350000000000001e-07, "loss": 0.1513, "step": 292975 }, { "epoch": 2.88, "grad_norm": 6.188416481018066, "learning_rate": 1.230689655172414e-07, "loss": 0.0888, "step": 293000 }, { "epoch": 2.88, "grad_norm": 3.3503623008728027, "learning_rate": 1.2263793103448277e-07, "loss": 0.1656, "step": 293025 }, { "epoch": 2.88, "grad_norm": 14.807955741882324, "learning_rate": 1.2220689655172415e-07, "loss": 0.1067, "step": 293050 }, { "epoch": 2.88, "grad_norm": 3.5768308639526367, "learning_rate": 1.2177586206896552e-07, "loss": 0.1523, "step": 293075 }, { "epoch": 2.88, "grad_norm": 23.9953556060791, "learning_rate": 1.213448275862069e-07, "loss": 0.1011, "step": 293100 }, { "epoch": 2.88, "grad_norm": 3.410200357437134, "learning_rate": 1.2091379310344828e-07, "loss": 0.1418, "step": 293125 }, { "epoch": 2.88, "grad_norm": 14.895130157470703, "learning_rate": 1.2048275862068965e-07, "loss": 0.0956, "step": 293150 }, { "epoch": 2.88, "grad_norm": 2.940502643585205, "learning_rate": 1.2005172413793106e-07, "loss": 0.1643, "step": 293175 }, { "epoch": 2.88, "grad_norm": 4.643350601196289, "learning_rate": 1.1962068965517243e-07, "loss": 0.0861, "step": 293200 }, { "epoch": 2.88, "grad_norm": 0.7877209186553955, "learning_rate": 1.1918965517241381e-07, "loss": 0.144, "step": 293225 }, { "epoch": 2.88, "grad_norm": 22.34078025817871, "learning_rate": 1.1875862068965517e-07, "loss": 0.1226, "step": 293250 }, { "epoch": 2.88, "grad_norm": 7.803562164306641, "learning_rate": 1.1832758620689655e-07, "loss": 0.1894, "step": 293275 }, { "epoch": 2.88, "grad_norm": 9.28056812286377, "learning_rate": 1.1789655172413794e-07, "loss": 0.0757, "step": 293300 }, { "epoch": 2.88, "grad_norm": 3.911951780319214, "learning_rate": 1.1746551724137932e-07, "loss": 0.1785, "step": 293325 }, { "epoch": 2.88, "grad_norm": 14.867050170898438, "learning_rate": 1.170344827586207e-07, "loss": 0.0947, "step": 293350 }, { "epoch": 2.88, "grad_norm": 5.630527973175049, "learning_rate": 1.1660344827586209e-07, "loss": 0.1785, "step": 293375 }, { "epoch": 2.88, "grad_norm": 10.420001029968262, "learning_rate": 1.1617241379310346e-07, "loss": 0.0979, "step": 293400 }, { "epoch": 2.89, "grad_norm": 4.4834747314453125, "learning_rate": 1.1574137931034483e-07, "loss": 0.1658, "step": 293425 }, { "epoch": 2.89, "grad_norm": 7.9502668380737305, "learning_rate": 1.1531034482758622e-07, "loss": 0.0947, "step": 293450 }, { "epoch": 2.89, "grad_norm": 0.19125023484230042, "learning_rate": 1.148793103448276e-07, "loss": 0.1731, "step": 293475 }, { "epoch": 2.89, "grad_norm": 8.377144813537598, "learning_rate": 1.1444827586206897e-07, "loss": 0.0796, "step": 293500 }, { "epoch": 2.89, "grad_norm": 6.20858907699585, "learning_rate": 1.1401724137931036e-07, "loss": 0.173, "step": 293525 }, { "epoch": 2.89, "grad_norm": 13.790017127990723, "learning_rate": 1.1358620689655174e-07, "loss": 0.106, "step": 293550 }, { "epoch": 2.89, "grad_norm": 3.9681153297424316, "learning_rate": 1.131551724137931e-07, "loss": 0.1578, "step": 293575 }, { "epoch": 2.89, "grad_norm": 10.62621784210205, "learning_rate": 1.1272413793103448e-07, "loss": 0.1091, "step": 293600 }, { "epoch": 2.89, "grad_norm": 2.9053547382354736, "learning_rate": 1.1229310344827587e-07, "loss": 0.1832, "step": 293625 }, { "epoch": 2.89, "grad_norm": 10.420647621154785, "learning_rate": 1.1186206896551725e-07, "loss": 0.0815, "step": 293650 }, { "epoch": 2.89, "grad_norm": 4.159339904785156, "learning_rate": 1.1143103448275864e-07, "loss": 0.1741, "step": 293675 }, { "epoch": 2.89, "grad_norm": 11.616243362426758, "learning_rate": 1.1100000000000001e-07, "loss": 0.1025, "step": 293700 }, { "epoch": 2.89, "grad_norm": 3.7008209228515625, "learning_rate": 1.1056896551724139e-07, "loss": 0.233, "step": 293725 }, { "epoch": 2.89, "grad_norm": 15.65156078338623, "learning_rate": 1.1013793103448276e-07, "loss": 0.1115, "step": 293750 }, { "epoch": 2.89, "grad_norm": 4.240601539611816, "learning_rate": 1.0970689655172415e-07, "loss": 0.1672, "step": 293775 }, { "epoch": 2.89, "grad_norm": 9.543466567993164, "learning_rate": 1.0927586206896552e-07, "loss": 0.1045, "step": 293800 }, { "epoch": 2.89, "grad_norm": 1.391817331314087, "learning_rate": 1.088448275862069e-07, "loss": 0.14, "step": 293825 }, { "epoch": 2.89, "grad_norm": 5.1412763595581055, "learning_rate": 1.0841379310344829e-07, "loss": 0.0812, "step": 293850 }, { "epoch": 2.89, "grad_norm": 5.674554824829102, "learning_rate": 1.0798275862068967e-07, "loss": 0.1656, "step": 293875 }, { "epoch": 2.89, "grad_norm": 12.411210060119629, "learning_rate": 1.0755172413793106e-07, "loss": 0.1128, "step": 293900 }, { "epoch": 2.89, "grad_norm": 5.184309482574463, "learning_rate": 1.0712068965517242e-07, "loss": 0.1781, "step": 293925 }, { "epoch": 2.89, "grad_norm": 14.860516548156738, "learning_rate": 1.066896551724138e-07, "loss": 0.0705, "step": 293950 }, { "epoch": 2.89, "grad_norm": 5.535465240478516, "learning_rate": 1.0625862068965518e-07, "loss": 0.1786, "step": 293975 }, { "epoch": 2.89, "grad_norm": 11.890673637390137, "learning_rate": 1.0582758620689657e-07, "loss": 0.1004, "step": 294000 }, { "epoch": 2.89, "grad_norm": 1.5670616626739502, "learning_rate": 1.0539655172413794e-07, "loss": 0.1338, "step": 294025 }, { "epoch": 2.89, "grad_norm": 9.86021614074707, "learning_rate": 1.0496551724137932e-07, "loss": 0.0945, "step": 294050 }, { "epoch": 2.89, "grad_norm": 3.747861385345459, "learning_rate": 1.045344827586207e-07, "loss": 0.1713, "step": 294075 }, { "epoch": 2.89, "grad_norm": 14.036945343017578, "learning_rate": 1.0410344827586207e-07, "loss": 0.0982, "step": 294100 }, { "epoch": 2.89, "grad_norm": 4.1942057609558105, "learning_rate": 1.0367241379310345e-07, "loss": 0.1765, "step": 294125 }, { "epoch": 2.89, "grad_norm": 12.191746711730957, "learning_rate": 1.0324137931034484e-07, "loss": 0.0781, "step": 294150 }, { "epoch": 2.89, "grad_norm": 5.646463394165039, "learning_rate": 1.0281034482758622e-07, "loss": 0.1695, "step": 294175 }, { "epoch": 2.89, "grad_norm": 6.577790260314941, "learning_rate": 1.023793103448276e-07, "loss": 0.0903, "step": 294200 }, { "epoch": 2.89, "grad_norm": 6.361475944519043, "learning_rate": 1.0194827586206899e-07, "loss": 0.1633, "step": 294225 }, { "epoch": 2.89, "grad_norm": 9.930535316467285, "learning_rate": 1.0151724137931035e-07, "loss": 0.0765, "step": 294250 }, { "epoch": 2.89, "grad_norm": 3.9213521480560303, "learning_rate": 1.0108620689655173e-07, "loss": 0.1249, "step": 294275 }, { "epoch": 2.89, "grad_norm": 11.828168869018555, "learning_rate": 1.006551724137931e-07, "loss": 0.0821, "step": 294300 }, { "epoch": 2.89, "grad_norm": 6.056548118591309, "learning_rate": 1.002241379310345e-07, "loss": 0.2021, "step": 294325 }, { "epoch": 2.89, "grad_norm": 13.090659141540527, "learning_rate": 9.979310344827587e-08, "loss": 0.0959, "step": 294350 }, { "epoch": 2.89, "grad_norm": 4.575307846069336, "learning_rate": 9.936206896551726e-08, "loss": 0.1844, "step": 294375 }, { "epoch": 2.89, "grad_norm": 10.389701843261719, "learning_rate": 9.893103448275864e-08, "loss": 0.0994, "step": 294400 }, { "epoch": 2.89, "grad_norm": 5.345357894897461, "learning_rate": 9.85e-08, "loss": 0.1825, "step": 294425 }, { "epoch": 2.9, "grad_norm": 9.522100448608398, "learning_rate": 9.806896551724138e-08, "loss": 0.094, "step": 294450 }, { "epoch": 2.9, "grad_norm": 2.73942494392395, "learning_rate": 9.765517241379312e-08, "loss": 0.1738, "step": 294475 }, { "epoch": 2.9, "grad_norm": 15.759883880615234, "learning_rate": 9.722413793103449e-08, "loss": 0.1243, "step": 294500 }, { "epoch": 2.9, "grad_norm": 2.9436581134796143, "learning_rate": 9.679310344827587e-08, "loss": 0.1612, "step": 294525 }, { "epoch": 2.9, "grad_norm": 8.336920738220215, "learning_rate": 9.636206896551726e-08, "loss": 0.0761, "step": 294550 }, { "epoch": 2.9, "grad_norm": 0.687181293964386, "learning_rate": 9.593103448275862e-08, "loss": 0.1769, "step": 294575 }, { "epoch": 2.9, "grad_norm": 8.226532936096191, "learning_rate": 9.55e-08, "loss": 0.1168, "step": 294600 }, { "epoch": 2.9, "grad_norm": 3.852590560913086, "learning_rate": 9.506896551724138e-08, "loss": 0.1616, "step": 294625 }, { "epoch": 2.9, "grad_norm": 9.938506126403809, "learning_rate": 9.463793103448277e-08, "loss": 0.101, "step": 294650 }, { "epoch": 2.9, "grad_norm": 3.1741106510162354, "learning_rate": 9.420689655172415e-08, "loss": 0.1636, "step": 294675 }, { "epoch": 2.9, "grad_norm": 6.337513446807861, "learning_rate": 9.377586206896554e-08, "loss": 0.0841, "step": 294700 }, { "epoch": 2.9, "grad_norm": 3.4907376766204834, "learning_rate": 9.334482758620691e-08, "loss": 0.1503, "step": 294725 }, { "epoch": 2.9, "grad_norm": 10.667658805847168, "learning_rate": 9.291379310344828e-08, "loss": 0.1062, "step": 294750 }, { "epoch": 2.9, "grad_norm": 0.7461663484573364, "learning_rate": 9.248275862068965e-08, "loss": 0.1372, "step": 294775 }, { "epoch": 2.9, "grad_norm": 9.168915748596191, "learning_rate": 9.205172413793104e-08, "loss": 0.0948, "step": 294800 }, { "epoch": 2.9, "grad_norm": 0.031603600829839706, "learning_rate": 9.162068965517242e-08, "loss": 0.175, "step": 294825 }, { "epoch": 2.9, "grad_norm": 14.06960678100586, "learning_rate": 9.11896551724138e-08, "loss": 0.1013, "step": 294850 }, { "epoch": 2.9, "grad_norm": 6.23288631439209, "learning_rate": 9.075862068965519e-08, "loss": 0.1934, "step": 294875 }, { "epoch": 2.9, "grad_norm": 14.718740463256836, "learning_rate": 9.032758620689657e-08, "loss": 0.0993, "step": 294900 }, { "epoch": 2.9, "grad_norm": 6.424144268035889, "learning_rate": 8.989655172413793e-08, "loss": 0.2031, "step": 294925 }, { "epoch": 2.9, "grad_norm": 13.281496047973633, "learning_rate": 8.946551724137932e-08, "loss": 0.0773, "step": 294950 }, { "epoch": 2.9, "grad_norm": 4.070359706878662, "learning_rate": 8.90344827586207e-08, "loss": 0.1926, "step": 294975 }, { "epoch": 2.9, "grad_norm": 11.857867240905762, "learning_rate": 8.860344827586207e-08, "loss": 0.089, "step": 295000 }, { "epoch": 2.9, "grad_norm": 1.3590091466903687, "learning_rate": 8.817241379310346e-08, "loss": 0.1481, "step": 295025 }, { "epoch": 2.9, "grad_norm": 15.31335163116455, "learning_rate": 8.774137931034484e-08, "loss": 0.1208, "step": 295050 }, { "epoch": 2.9, "grad_norm": 2.2669222354888916, "learning_rate": 8.73103448275862e-08, "loss": 0.1365, "step": 295075 }, { "epoch": 2.9, "grad_norm": 9.272025108337402, "learning_rate": 8.687931034482758e-08, "loss": 0.0702, "step": 295100 }, { "epoch": 2.9, "grad_norm": 10.076847076416016, "learning_rate": 8.644827586206897e-08, "loss": 0.2019, "step": 295125 }, { "epoch": 2.9, "grad_norm": 12.623489379882812, "learning_rate": 8.601724137931035e-08, "loss": 0.0922, "step": 295150 }, { "epoch": 2.9, "grad_norm": 8.397134780883789, "learning_rate": 8.558620689655174e-08, "loss": 0.1752, "step": 295175 }, { "epoch": 2.9, "grad_norm": 10.004561424255371, "learning_rate": 8.515517241379312e-08, "loss": 0.0891, "step": 295200 }, { "epoch": 2.9, "grad_norm": 3.452083110809326, "learning_rate": 8.47241379310345e-08, "loss": 0.1553, "step": 295225 }, { "epoch": 2.9, "grad_norm": 13.07673168182373, "learning_rate": 8.429310344827586e-08, "loss": 0.0961, "step": 295250 }, { "epoch": 2.9, "grad_norm": 2.87290620803833, "learning_rate": 8.386206896551725e-08, "loss": 0.139, "step": 295275 }, { "epoch": 2.9, "grad_norm": 7.9403157234191895, "learning_rate": 8.343103448275863e-08, "loss": 0.1089, "step": 295300 }, { "epoch": 2.9, "grad_norm": 2.091261863708496, "learning_rate": 8.3e-08, "loss": 0.1959, "step": 295325 }, { "epoch": 2.9, "grad_norm": 11.364568710327148, "learning_rate": 8.256896551724139e-08, "loss": 0.1016, "step": 295350 }, { "epoch": 2.9, "grad_norm": 6.163838863372803, "learning_rate": 8.213793103448277e-08, "loss": 0.1842, "step": 295375 }, { "epoch": 2.9, "grad_norm": 12.941316604614258, "learning_rate": 8.170689655172413e-08, "loss": 0.0906, "step": 295400 }, { "epoch": 2.9, "grad_norm": 6.691166400909424, "learning_rate": 8.127586206896552e-08, "loss": 0.1476, "step": 295425 }, { "epoch": 2.9, "grad_norm": 11.656091690063477, "learning_rate": 8.08448275862069e-08, "loss": 0.1033, "step": 295450 }, { "epoch": 2.91, "grad_norm": 6.019611835479736, "learning_rate": 8.041379310344828e-08, "loss": 0.1942, "step": 295475 }, { "epoch": 2.91, "grad_norm": 6.926949501037598, "learning_rate": 7.998275862068967e-08, "loss": 0.1015, "step": 295500 }, { "epoch": 2.91, "grad_norm": 1.2333731651306152, "learning_rate": 7.955172413793104e-08, "loss": 0.2063, "step": 295525 }, { "epoch": 2.91, "grad_norm": 11.580008506774902, "learning_rate": 7.912068965517242e-08, "loss": 0.0914, "step": 295550 }, { "epoch": 2.91, "grad_norm": 1.9883722066879272, "learning_rate": 7.868965517241379e-08, "loss": 0.2152, "step": 295575 }, { "epoch": 2.91, "grad_norm": 15.073786735534668, "learning_rate": 7.825862068965518e-08, "loss": 0.0871, "step": 295600 }, { "epoch": 2.91, "grad_norm": 3.720979690551758, "learning_rate": 7.782758620689655e-08, "loss": 0.1703, "step": 295625 }, { "epoch": 2.91, "grad_norm": 12.683637619018555, "learning_rate": 7.739655172413794e-08, "loss": 0.1046, "step": 295650 }, { "epoch": 2.91, "grad_norm": 3.777600049972534, "learning_rate": 7.696551724137932e-08, "loss": 0.1848, "step": 295675 }, { "epoch": 2.91, "grad_norm": 9.028441429138184, "learning_rate": 7.65344827586207e-08, "loss": 0.086, "step": 295700 }, { "epoch": 2.91, "grad_norm": 5.970548152923584, "learning_rate": 7.610344827586207e-08, "loss": 0.1745, "step": 295725 }, { "epoch": 2.91, "grad_norm": 8.89266586303711, "learning_rate": 7.567241379310345e-08, "loss": 0.0683, "step": 295750 }, { "epoch": 2.91, "grad_norm": 3.862549066543579, "learning_rate": 7.524137931034483e-08, "loss": 0.1721, "step": 295775 }, { "epoch": 2.91, "grad_norm": 9.873758316040039, "learning_rate": 7.48103448275862e-08, "loss": 0.0988, "step": 295800 }, { "epoch": 2.91, "grad_norm": 5.837016582489014, "learning_rate": 7.43793103448276e-08, "loss": 0.1554, "step": 295825 }, { "epoch": 2.91, "grad_norm": 10.48218059539795, "learning_rate": 7.394827586206897e-08, "loss": 0.0802, "step": 295850 }, { "epoch": 2.91, "grad_norm": 1.7585521936416626, "learning_rate": 7.351724137931035e-08, "loss": 0.1693, "step": 295875 }, { "epoch": 2.91, "grad_norm": 11.143449783325195, "learning_rate": 7.308620689655173e-08, "loss": 0.1012, "step": 295900 }, { "epoch": 2.91, "grad_norm": 4.4355974197387695, "learning_rate": 7.265517241379312e-08, "loss": 0.1996, "step": 295925 }, { "epoch": 2.91, "grad_norm": 4.928380966186523, "learning_rate": 7.222413793103448e-08, "loss": 0.105, "step": 295950 }, { "epoch": 2.91, "grad_norm": 5.88162899017334, "learning_rate": 7.179310344827587e-08, "loss": 0.1473, "step": 295975 }, { "epoch": 2.91, "grad_norm": 6.338206768035889, "learning_rate": 7.136206896551725e-08, "loss": 0.0859, "step": 296000 }, { "epoch": 2.91, "grad_norm": 10.673406600952148, "learning_rate": 7.093103448275863e-08, "loss": 0.1397, "step": 296025 }, { "epoch": 2.91, "grad_norm": 10.828954696655273, "learning_rate": 7.05e-08, "loss": 0.0596, "step": 296050 }, { "epoch": 2.91, "grad_norm": 0.5492637157440186, "learning_rate": 7.006896551724138e-08, "loss": 0.1848, "step": 296075 }, { "epoch": 2.91, "grad_norm": 11.320109367370605, "learning_rate": 6.963793103448277e-08, "loss": 0.1032, "step": 296100 }, { "epoch": 2.91, "grad_norm": 2.0367605686187744, "learning_rate": 6.920689655172415e-08, "loss": 0.1417, "step": 296125 }, { "epoch": 2.91, "grad_norm": 29.94042205810547, "learning_rate": 6.877586206896552e-08, "loss": 0.1279, "step": 296150 }, { "epoch": 2.91, "grad_norm": 4.024247169494629, "learning_rate": 6.83448275862069e-08, "loss": 0.1595, "step": 296175 }, { "epoch": 2.91, "grad_norm": 6.3494873046875, "learning_rate": 6.791379310344828e-08, "loss": 0.0834, "step": 296200 }, { "epoch": 2.91, "grad_norm": 1.736323595046997, "learning_rate": 6.748275862068966e-08, "loss": 0.1694, "step": 296225 }, { "epoch": 2.91, "grad_norm": 8.204748153686523, "learning_rate": 6.705172413793105e-08, "loss": 0.0739, "step": 296250 }, { "epoch": 2.91, "grad_norm": 2.4987354278564453, "learning_rate": 6.662068965517241e-08, "loss": 0.164, "step": 296275 }, { "epoch": 2.91, "grad_norm": 14.86589527130127, "learning_rate": 6.61896551724138e-08, "loss": 0.1127, "step": 296300 }, { "epoch": 2.91, "grad_norm": 3.3350517749786377, "learning_rate": 6.575862068965518e-08, "loss": 0.1525, "step": 296325 }, { "epoch": 2.91, "grad_norm": 17.13681411743164, "learning_rate": 6.532758620689657e-08, "loss": 0.085, "step": 296350 }, { "epoch": 2.91, "grad_norm": 7.541744232177734, "learning_rate": 6.489655172413793e-08, "loss": 0.2067, "step": 296375 }, { "epoch": 2.91, "grad_norm": 11.138779640197754, "learning_rate": 6.446551724137932e-08, "loss": 0.0952, "step": 296400 }, { "epoch": 2.91, "grad_norm": 5.871278762817383, "learning_rate": 6.40344827586207e-08, "loss": 0.171, "step": 296425 }, { "epoch": 2.91, "grad_norm": 13.500067710876465, "learning_rate": 6.360344827586208e-08, "loss": 0.1013, "step": 296450 }, { "epoch": 2.91, "grad_norm": 5.0185651779174805, "learning_rate": 6.317241379310345e-08, "loss": 0.1709, "step": 296475 }, { "epoch": 2.92, "grad_norm": 13.210301399230957, "learning_rate": 6.274137931034483e-08, "loss": 0.0907, "step": 296500 }, { "epoch": 2.92, "grad_norm": 4.1737236976623535, "learning_rate": 6.23103448275862e-08, "loss": 0.1506, "step": 296525 }, { "epoch": 2.92, "grad_norm": 6.37442684173584, "learning_rate": 6.187931034482758e-08, "loss": 0.0916, "step": 296550 }, { "epoch": 2.92, "grad_norm": 2.2315990924835205, "learning_rate": 6.144827586206897e-08, "loss": 0.158, "step": 296575 }, { "epoch": 2.92, "grad_norm": 12.447103500366211, "learning_rate": 6.101724137931035e-08, "loss": 0.0921, "step": 296600 }, { "epoch": 2.92, "grad_norm": 5.14431095123291, "learning_rate": 6.060344827586207e-08, "loss": 0.1922, "step": 296625 }, { "epoch": 2.92, "grad_norm": 9.405245780944824, "learning_rate": 6.017241379310345e-08, "loss": 0.0884, "step": 296650 }, { "epoch": 2.92, "grad_norm": 3.381943702697754, "learning_rate": 5.974137931034483e-08, "loss": 0.161, "step": 296675 }, { "epoch": 2.92, "grad_norm": 11.711341857910156, "learning_rate": 5.9310344827586206e-08, "loss": 0.0728, "step": 296700 }, { "epoch": 2.92, "grad_norm": 3.496731758117676, "learning_rate": 5.887931034482759e-08, "loss": 0.1492, "step": 296725 }, { "epoch": 2.92, "grad_norm": 8.849160194396973, "learning_rate": 5.844827586206897e-08, "loss": 0.0819, "step": 296750 }, { "epoch": 2.92, "grad_norm": 5.0755391120910645, "learning_rate": 5.8017241379310344e-08, "loss": 0.1689, "step": 296775 }, { "epoch": 2.92, "grad_norm": 4.831711292266846, "learning_rate": 5.758620689655173e-08, "loss": 0.0896, "step": 296800 }, { "epoch": 2.92, "grad_norm": 5.444356441497803, "learning_rate": 5.715517241379311e-08, "loss": 0.164, "step": 296825 }, { "epoch": 2.92, "grad_norm": 12.753169059753418, "learning_rate": 5.672413793103449e-08, "loss": 0.1196, "step": 296850 }, { "epoch": 2.92, "grad_norm": 2.4016940593719482, "learning_rate": 5.6293103448275865e-08, "loss": 0.1566, "step": 296875 }, { "epoch": 2.92, "grad_norm": 11.474163055419922, "learning_rate": 5.586206896551725e-08, "loss": 0.0755, "step": 296900 }, { "epoch": 2.92, "grad_norm": 3.55692195892334, "learning_rate": 5.5431034482758626e-08, "loss": 0.1599, "step": 296925 }, { "epoch": 2.92, "grad_norm": 15.72424602508545, "learning_rate": 5.5e-08, "loss": 0.095, "step": 296950 }, { "epoch": 2.92, "grad_norm": 4.927239418029785, "learning_rate": 5.4568965517241386e-08, "loss": 0.1685, "step": 296975 }, { "epoch": 2.92, "grad_norm": 13.68856430053711, "learning_rate": 5.4137931034482764e-08, "loss": 0.0835, "step": 297000 }, { "epoch": 2.92, "grad_norm": 4.720460891723633, "learning_rate": 5.370689655172414e-08, "loss": 0.1699, "step": 297025 }, { "epoch": 2.92, "grad_norm": 6.850702285766602, "learning_rate": 5.327586206896552e-08, "loss": 0.0986, "step": 297050 }, { "epoch": 2.92, "grad_norm": 8.317587852478027, "learning_rate": 5.28448275862069e-08, "loss": 0.1753, "step": 297075 }, { "epoch": 2.92, "grad_norm": 5.139171600341797, "learning_rate": 5.2413793103448285e-08, "loss": 0.1121, "step": 297100 }, { "epoch": 2.92, "grad_norm": 4.189998149871826, "learning_rate": 5.1982758620689655e-08, "loss": 0.1759, "step": 297125 }, { "epoch": 2.92, "grad_norm": 8.865562438964844, "learning_rate": 5.155172413793104e-08, "loss": 0.0925, "step": 297150 }, { "epoch": 2.92, "grad_norm": 1.8262745141983032, "learning_rate": 5.112068965517242e-08, "loss": 0.1638, "step": 297175 }, { "epoch": 2.92, "grad_norm": 13.61965274810791, "learning_rate": 5.068965517241379e-08, "loss": 0.093, "step": 297200 }, { "epoch": 2.92, "grad_norm": 1.3118914365768433, "learning_rate": 5.025862068965518e-08, "loss": 0.1522, "step": 297225 }, { "epoch": 2.92, "grad_norm": 8.801728248596191, "learning_rate": 4.982758620689656e-08, "loss": 0.0607, "step": 297250 }, { "epoch": 2.92, "grad_norm": 5.964912414550781, "learning_rate": 4.939655172413793e-08, "loss": 0.1265, "step": 297275 }, { "epoch": 2.92, "grad_norm": 9.247302055358887, "learning_rate": 4.8965517241379315e-08, "loss": 0.0819, "step": 297300 }, { "epoch": 2.92, "grad_norm": 3.353518486022949, "learning_rate": 4.853448275862069e-08, "loss": 0.1656, "step": 297325 }, { "epoch": 2.92, "grad_norm": 11.570937156677246, "learning_rate": 4.8103448275862075e-08, "loss": 0.0735, "step": 297350 }, { "epoch": 2.92, "grad_norm": 4.92053747177124, "learning_rate": 4.767241379310345e-08, "loss": 0.1626, "step": 297375 }, { "epoch": 2.92, "grad_norm": 5.76597785949707, "learning_rate": 4.724137931034483e-08, "loss": 0.0764, "step": 297400 }, { "epoch": 2.92, "grad_norm": 0.6934220790863037, "learning_rate": 4.681034482758621e-08, "loss": 0.1607, "step": 297425 }, { "epoch": 2.92, "grad_norm": 9.208626747131348, "learning_rate": 4.637931034482759e-08, "loss": 0.0918, "step": 297450 }, { "epoch": 2.92, "grad_norm": 7.154460430145264, "learning_rate": 4.594827586206897e-08, "loss": 0.2052, "step": 297475 }, { "epoch": 2.93, "grad_norm": 17.150705337524414, "learning_rate": 4.551724137931035e-08, "loss": 0.104, "step": 297500 }, { "epoch": 2.93, "grad_norm": 5.068700313568115, "learning_rate": 4.508620689655172e-08, "loss": 0.1785, "step": 297525 }, { "epoch": 2.93, "grad_norm": 11.936406135559082, "learning_rate": 4.4655172413793105e-08, "loss": 0.0896, "step": 297550 }, { "epoch": 2.93, "grad_norm": 4.324743747711182, "learning_rate": 4.422413793103449e-08, "loss": 0.1664, "step": 297575 }, { "epoch": 2.93, "grad_norm": 16.310768127441406, "learning_rate": 4.379310344827587e-08, "loss": 0.0932, "step": 297600 }, { "epoch": 2.93, "grad_norm": 6.311774253845215, "learning_rate": 4.336206896551724e-08, "loss": 0.1798, "step": 297625 }, { "epoch": 2.93, "grad_norm": 7.767374038696289, "learning_rate": 4.2931034482758626e-08, "loss": 0.0869, "step": 297650 }, { "epoch": 2.93, "grad_norm": 2.7620785236358643, "learning_rate": 4.2500000000000003e-08, "loss": 0.1823, "step": 297675 }, { "epoch": 2.93, "grad_norm": 11.528992652893066, "learning_rate": 4.206896551724138e-08, "loss": 0.1078, "step": 297700 }, { "epoch": 2.93, "grad_norm": 6.005678176879883, "learning_rate": 4.1637931034482764e-08, "loss": 0.1886, "step": 297725 }, { "epoch": 2.93, "grad_norm": 9.768656730651855, "learning_rate": 4.120689655172414e-08, "loss": 0.0825, "step": 297750 }, { "epoch": 2.93, "grad_norm": 4.668910503387451, "learning_rate": 4.077586206896552e-08, "loss": 0.1895, "step": 297775 }, { "epoch": 2.93, "grad_norm": 11.150866508483887, "learning_rate": 4.03448275862069e-08, "loss": 0.0863, "step": 297800 }, { "epoch": 2.93, "grad_norm": 7.084414482116699, "learning_rate": 3.991379310344828e-08, "loss": 0.1463, "step": 297825 }, { "epoch": 2.93, "grad_norm": 20.00340461730957, "learning_rate": 3.948275862068966e-08, "loss": 0.1105, "step": 297850 }, { "epoch": 2.93, "grad_norm": 6.262966632843018, "learning_rate": 3.905172413793103e-08, "loss": 0.1608, "step": 297875 }, { "epoch": 2.93, "grad_norm": 9.487909317016602, "learning_rate": 3.8620689655172417e-08, "loss": 0.1095, "step": 297900 }, { "epoch": 2.93, "grad_norm": 3.678886890411377, "learning_rate": 3.8189655172413794e-08, "loss": 0.1851, "step": 297925 }, { "epoch": 2.93, "grad_norm": 10.688139915466309, "learning_rate": 3.775862068965518e-08, "loss": 0.0964, "step": 297950 }, { "epoch": 2.93, "grad_norm": 5.102015495300293, "learning_rate": 3.7327586206896554e-08, "loss": 0.1576, "step": 297975 }, { "epoch": 2.93, "grad_norm": 18.443208694458008, "learning_rate": 3.689655172413793e-08, "loss": 0.1332, "step": 298000 }, { "epoch": 2.93, "grad_norm": 4.408807754516602, "learning_rate": 3.6465517241379315e-08, "loss": 0.1885, "step": 298025 }, { "epoch": 2.93, "grad_norm": 12.332672119140625, "learning_rate": 3.603448275862069e-08, "loss": 0.0784, "step": 298050 }, { "epoch": 2.93, "grad_norm": 5.5166521072387695, "learning_rate": 3.5603448275862076e-08, "loss": 0.1683, "step": 298075 }, { "epoch": 2.93, "grad_norm": 12.162002563476562, "learning_rate": 3.517241379310345e-08, "loss": 0.0794, "step": 298100 }, { "epoch": 2.93, "grad_norm": 1.774458885192871, "learning_rate": 3.474137931034483e-08, "loss": 0.1473, "step": 298125 }, { "epoch": 2.93, "grad_norm": 12.144302368164062, "learning_rate": 3.431034482758621e-08, "loss": 0.1248, "step": 298150 }, { "epoch": 2.93, "grad_norm": 4.000005722045898, "learning_rate": 3.387931034482759e-08, "loss": 0.2138, "step": 298175 }, { "epoch": 2.93, "grad_norm": 6.972421646118164, "learning_rate": 3.344827586206897e-08, "loss": 0.0949, "step": 298200 }, { "epoch": 2.93, "grad_norm": 3.663241147994995, "learning_rate": 3.3017241379310345e-08, "loss": 0.1628, "step": 298225 }, { "epoch": 2.93, "grad_norm": 13.880690574645996, "learning_rate": 3.258620689655172e-08, "loss": 0.0809, "step": 298250 }, { "epoch": 2.93, "grad_norm": 4.352648735046387, "learning_rate": 3.2155172413793105e-08, "loss": 0.164, "step": 298275 }, { "epoch": 2.93, "grad_norm": 13.263358116149902, "learning_rate": 3.172413793103448e-08, "loss": 0.0982, "step": 298300 }, { "epoch": 2.93, "grad_norm": 8.948261260986328, "learning_rate": 3.1293103448275866e-08, "loss": 0.1574, "step": 298325 }, { "epoch": 2.93, "grad_norm": 11.337494850158691, "learning_rate": 3.086206896551724e-08, "loss": 0.0922, "step": 298350 }, { "epoch": 2.93, "grad_norm": 2.1432151794433594, "learning_rate": 3.043103448275862e-08, "loss": 0.1449, "step": 298375 }, { "epoch": 2.93, "grad_norm": 12.110801696777344, "learning_rate": 3.0000000000000004e-08, "loss": 0.0952, "step": 298400 }, { "epoch": 2.93, "grad_norm": 4.11094856262207, "learning_rate": 2.956896551724138e-08, "loss": 0.1658, "step": 298425 }, { "epoch": 2.93, "grad_norm": 6.718938827514648, "learning_rate": 2.913793103448276e-08, "loss": 0.0795, "step": 298450 }, { "epoch": 2.93, "grad_norm": 5.248793125152588, "learning_rate": 2.8706896551724142e-08, "loss": 0.194, "step": 298475 }, { "epoch": 2.93, "grad_norm": 9.308308601379395, "learning_rate": 2.827586206896552e-08, "loss": 0.1162, "step": 298500 }, { "epoch": 2.94, "grad_norm": 4.5243072509765625, "learning_rate": 2.78448275862069e-08, "loss": 0.1911, "step": 298525 }, { "epoch": 2.94, "grad_norm": 17.18242835998535, "learning_rate": 2.7413793103448276e-08, "loss": 0.1051, "step": 298550 }, { "epoch": 2.94, "grad_norm": 3.025996208190918, "learning_rate": 2.698275862068966e-08, "loss": 0.1921, "step": 298575 }, { "epoch": 2.94, "grad_norm": 8.595040321350098, "learning_rate": 2.6551724137931037e-08, "loss": 0.0871, "step": 298600 }, { "epoch": 2.94, "grad_norm": 2.908895969390869, "learning_rate": 2.6120689655172414e-08, "loss": 0.1962, "step": 298625 }, { "epoch": 2.94, "grad_norm": 14.426276206970215, "learning_rate": 2.5689655172413794e-08, "loss": 0.0806, "step": 298650 }, { "epoch": 2.94, "grad_norm": 3.365017890930176, "learning_rate": 2.5258620689655175e-08, "loss": 0.1398, "step": 298675 }, { "epoch": 2.94, "grad_norm": 14.527469635009766, "learning_rate": 2.4827586206896555e-08, "loss": 0.0859, "step": 298700 }, { "epoch": 2.94, "grad_norm": 4.643472671508789, "learning_rate": 2.4396551724137932e-08, "loss": 0.1523, "step": 298725 }, { "epoch": 2.94, "grad_norm": 8.82208251953125, "learning_rate": 2.396551724137931e-08, "loss": 0.101, "step": 298750 }, { "epoch": 2.94, "grad_norm": 3.4180538654327393, "learning_rate": 2.3534482758620693e-08, "loss": 0.178, "step": 298775 }, { "epoch": 2.94, "grad_norm": 19.146263122558594, "learning_rate": 2.310344827586207e-08, "loss": 0.1074, "step": 298800 }, { "epoch": 2.94, "grad_norm": 5.784060001373291, "learning_rate": 2.267241379310345e-08, "loss": 0.2029, "step": 298825 }, { "epoch": 2.94, "grad_norm": 8.054841041564941, "learning_rate": 2.224137931034483e-08, "loss": 0.1319, "step": 298850 }, { "epoch": 2.94, "grad_norm": 3.364631414413452, "learning_rate": 2.1810344827586208e-08, "loss": 0.2066, "step": 298875 }, { "epoch": 2.94, "grad_norm": 5.61177396774292, "learning_rate": 2.1379310344827588e-08, "loss": 0.1026, "step": 298900 }, { "epoch": 2.94, "grad_norm": 3.7072927951812744, "learning_rate": 2.0948275862068965e-08, "loss": 0.1542, "step": 298925 }, { "epoch": 2.94, "grad_norm": 13.257627487182617, "learning_rate": 2.051724137931035e-08, "loss": 0.1116, "step": 298950 }, { "epoch": 2.94, "grad_norm": 4.126539707183838, "learning_rate": 2.0086206896551726e-08, "loss": 0.151, "step": 298975 }, { "epoch": 2.94, "grad_norm": 17.12653160095215, "learning_rate": 1.9655172413793103e-08, "loss": 0.1046, "step": 299000 }, { "epoch": 2.94, "grad_norm": 4.77101993560791, "learning_rate": 1.9224137931034486e-08, "loss": 0.1802, "step": 299025 }, { "epoch": 2.94, "grad_norm": 5.526018142700195, "learning_rate": 1.8793103448275863e-08, "loss": 0.0538, "step": 299050 }, { "epoch": 2.94, "grad_norm": 3.5518054962158203, "learning_rate": 1.8362068965517244e-08, "loss": 0.1975, "step": 299075 }, { "epoch": 2.94, "grad_norm": 13.33297061920166, "learning_rate": 1.793103448275862e-08, "loss": 0.1134, "step": 299100 }, { "epoch": 2.94, "grad_norm": 2.5079433917999268, "learning_rate": 1.75e-08, "loss": 0.1696, "step": 299125 }, { "epoch": 2.94, "grad_norm": 14.533392906188965, "learning_rate": 1.706896551724138e-08, "loss": 0.091, "step": 299150 }, { "epoch": 2.94, "grad_norm": 5.561295509338379, "learning_rate": 1.663793103448276e-08, "loss": 0.1729, "step": 299175 }, { "epoch": 2.94, "grad_norm": 9.978918075561523, "learning_rate": 1.620689655172414e-08, "loss": 0.1077, "step": 299200 }, { "epoch": 2.94, "grad_norm": 3.666090726852417, "learning_rate": 1.5793103448275863e-08, "loss": 0.1523, "step": 299225 }, { "epoch": 2.94, "grad_norm": 13.53764533996582, "learning_rate": 1.5362068965517243e-08, "loss": 0.1039, "step": 299250 }, { "epoch": 2.94, "grad_norm": 2.5799200534820557, "learning_rate": 1.4931034482758624e-08, "loss": 0.1592, "step": 299275 }, { "epoch": 2.94, "grad_norm": 9.657661437988281, "learning_rate": 1.45e-08, "loss": 0.0719, "step": 299300 }, { "epoch": 2.94, "grad_norm": 3.594834089279175, "learning_rate": 1.406896551724138e-08, "loss": 0.1944, "step": 299325 }, { "epoch": 2.94, "grad_norm": 5.160403251647949, "learning_rate": 1.363793103448276e-08, "loss": 0.0969, "step": 299350 }, { "epoch": 2.94, "grad_norm": 4.878444194793701, "learning_rate": 1.3206896551724139e-08, "loss": 0.1828, "step": 299375 }, { "epoch": 2.94, "grad_norm": 18.92526626586914, "learning_rate": 1.2775862068965519e-08, "loss": 0.0911, "step": 299400 }, { "epoch": 2.94, "grad_norm": 3.67526912689209, "learning_rate": 1.2344827586206896e-08, "loss": 0.1613, "step": 299425 }, { "epoch": 2.94, "grad_norm": 13.152149200439453, "learning_rate": 1.1913793103448276e-08, "loss": 0.0943, "step": 299450 }, { "epoch": 2.94, "grad_norm": 3.5137858390808105, "learning_rate": 1.1482758620689657e-08, "loss": 0.1997, "step": 299475 }, { "epoch": 2.94, "grad_norm": 9.350616455078125, "learning_rate": 1.1051724137931035e-08, "loss": 0.0841, "step": 299500 }, { "epoch": 2.94, "grad_norm": 3.1646881103515625, "learning_rate": 1.0620689655172416e-08, "loss": 0.2057, "step": 299525 }, { "epoch": 2.95, "grad_norm": 7.395025730133057, "learning_rate": 1.0189655172413793e-08, "loss": 0.0715, "step": 299550 }, { "epoch": 2.95, "grad_norm": 10.633687019348145, "learning_rate": 9.758620689655173e-09, "loss": 0.1835, "step": 299575 }, { "epoch": 2.95, "grad_norm": 11.987170219421387, "learning_rate": 9.327586206896552e-09, "loss": 0.0718, "step": 299600 }, { "epoch": 2.95, "grad_norm": 3.026431083679199, "learning_rate": 8.896551724137932e-09, "loss": 0.1787, "step": 299625 }, { "epoch": 2.95, "grad_norm": 9.663456916809082, "learning_rate": 8.465517241379311e-09, "loss": 0.0972, "step": 299650 }, { "epoch": 2.95, "grad_norm": 7.4209370613098145, "learning_rate": 8.034482758620691e-09, "loss": 0.142, "step": 299675 }, { "epoch": 2.95, "grad_norm": 8.76673698425293, "learning_rate": 7.603448275862068e-09, "loss": 0.0837, "step": 299700 }, { "epoch": 2.95, "grad_norm": 4.35598087310791, "learning_rate": 7.172413793103449e-09, "loss": 0.1373, "step": 299725 }, { "epoch": 2.95, "grad_norm": 9.001477241516113, "learning_rate": 6.741379310344828e-09, "loss": 0.0847, "step": 299750 }, { "epoch": 2.95, "grad_norm": 7.1366496086120605, "learning_rate": 6.310344827586207e-09, "loss": 0.2104, "step": 299775 }, { "epoch": 2.95, "grad_norm": 11.962371826171875, "learning_rate": 5.879310344827587e-09, "loss": 0.1039, "step": 299800 }, { "epoch": 2.95, "grad_norm": 6.439513206481934, "learning_rate": 5.448275862068965e-09, "loss": 0.194, "step": 299825 }, { "epoch": 2.95, "grad_norm": 18.3298397064209, "learning_rate": 5.0172413793103455e-09, "loss": 0.1196, "step": 299850 }, { "epoch": 2.95, "grad_norm": 4.414209842681885, "learning_rate": 4.586206896551724e-09, "loss": 0.1653, "step": 299875 }, { "epoch": 2.95, "grad_norm": 9.826563835144043, "learning_rate": 4.155172413793104e-09, "loss": 0.0776, "step": 299900 }, { "epoch": 2.95, "grad_norm": 8.502961158752441, "learning_rate": 3.7241379310344832e-09, "loss": 0.1482, "step": 299925 }, { "epoch": 2.95, "grad_norm": 8.497183799743652, "learning_rate": 3.2931034482758624e-09, "loss": 0.0654, "step": 299950 }, { "epoch": 2.95, "grad_norm": 1.7729798555374146, "learning_rate": 2.8620689655172415e-09, "loss": 0.1672, "step": 299975 }, { "epoch": 2.95, "grad_norm": 5.470938205718994, "learning_rate": 2.431034482758621e-09, "loss": 0.0913, "step": 300000 }, { "epoch": 2.95, "eval_loss": 0.6491209864616394, "eval_runtime": 6034.7585, "eval_samples_per_second": 1.569, "eval_steps_per_second": 0.196, "eval_wer": 0.11473958668640959, "step": 300000 }, { "epoch": 2.95, "step": 300000, "total_flos": 2.449451778048e+21, "train_loss": 0.033957572368780775, "train_runtime": 319106.224, "train_samples_per_second": 7.521, "train_steps_per_second": 0.94 } ], "logging_steps": 25, "max_steps": 300000, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 5000, "total_flos": 2.449451778048e+21, "train_batch_size": 8, "trial_name": null, "trial_params": null }