{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.2249183938333266, "eval_steps": 10000, "global_step": 82000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 19.548603911230614, "learning_rate": 3e-09, "loss": 1.3664, "step": 25 }, { "epoch": 0.0, "grad_norm": 17.74263326459735, "learning_rate": 1.55e-08, "loss": 1.3938, "step": 50 }, { "epoch": 0.0, "grad_norm": 19.768157816871117, "learning_rate": 2.8000000000000003e-08, "loss": 1.3875, "step": 75 }, { "epoch": 0.0, "grad_norm": 18.18791431777122, "learning_rate": 4.05e-08, "loss": 1.2869, "step": 100 }, { "epoch": 0.0, "grad_norm": 20.288361662728292, "learning_rate": 5.3000000000000005e-08, "loss": 1.3836, "step": 125 }, { "epoch": 0.01, "grad_norm": 19.62961424039089, "learning_rate": 6.550000000000001e-08, "loss": 1.3198, "step": 150 }, { "epoch": 0.01, "grad_norm": 18.510724860772605, "learning_rate": 7.8e-08, "loss": 1.3883, "step": 175 }, { "epoch": 0.01, "grad_norm": 18.00659579211933, "learning_rate": 9.050000000000001e-08, "loss": 1.2896, "step": 200 }, { "epoch": 0.01, "grad_norm": 17.149030358259694, "learning_rate": 1.0300000000000001e-07, "loss": 1.2758, "step": 225 }, { "epoch": 0.01, "grad_norm": 17.719461286476776, "learning_rate": 1.1550000000000001e-07, "loss": 1.1934, "step": 250 }, { "epoch": 0.01, "grad_norm": 18.479532395253678, "learning_rate": 1.28e-07, "loss": 1.2667, "step": 275 }, { "epoch": 0.01, "grad_norm": 18.189268236304354, "learning_rate": 1.4050000000000002e-07, "loss": 1.1188, "step": 300 }, { "epoch": 0.01, "grad_norm": 18.485150666624385, "learning_rate": 1.53e-07, "loss": 1.0843, "step": 325 }, { "epoch": 0.01, "grad_norm": 16.161860822592118, "learning_rate": 1.655e-07, "loss": 0.9748, "step": 350 }, { "epoch": 0.01, "grad_norm": 16.77864844044536, "learning_rate": 1.78e-07, "loss": 0.7618, "step": 375 }, { "epoch": 0.02, "grad_norm": 13.284839665193802, "learning_rate": 1.9050000000000002e-07, "loss": 0.6402, "step": 400 }, { "epoch": 0.02, "grad_norm": 12.563585737686541, "learning_rate": 2.03e-07, "loss": 0.6147, "step": 425 }, { "epoch": 0.02, "grad_norm": 14.195526034976105, "learning_rate": 2.1550000000000002e-07, "loss": 0.5645, "step": 450 }, { "epoch": 0.02, "grad_norm": 15.813975597971679, "learning_rate": 2.2800000000000003e-07, "loss": 0.564, "step": 475 }, { "epoch": 0.02, "grad_norm": 13.223184796437847, "learning_rate": 2.405e-07, "loss": 0.5439, "step": 500 }, { "epoch": 0.02, "grad_norm": 11.65715823434448, "learning_rate": 2.53e-07, "loss": 0.5262, "step": 525 }, { "epoch": 0.02, "grad_norm": 12.140278336499202, "learning_rate": 2.6550000000000004e-07, "loss": 0.4831, "step": 550 }, { "epoch": 0.02, "grad_norm": 10.595125220315392, "learning_rate": 2.7800000000000003e-07, "loss": 0.4605, "step": 575 }, { "epoch": 0.02, "grad_norm": 10.287013706634347, "learning_rate": 2.905e-07, "loss": 0.4882, "step": 600 }, { "epoch": 0.02, "grad_norm": 11.461372129814126, "learning_rate": 3.0300000000000005e-07, "loss": 0.4616, "step": 625 }, { "epoch": 0.03, "grad_norm": 9.492888534763829, "learning_rate": 3.1550000000000004e-07, "loss": 0.4572, "step": 650 }, { "epoch": 0.03, "grad_norm": 11.17219796215868, "learning_rate": 3.280000000000001e-07, "loss": 0.4422, "step": 675 }, { "epoch": 0.03, "grad_norm": 9.431269877118583, "learning_rate": 3.405e-07, "loss": 0.4441, "step": 700 }, { "epoch": 0.03, "grad_norm": 9.077076398765083, "learning_rate": 3.53e-07, "loss": 0.4324, "step": 725 }, { "epoch": 0.03, "grad_norm": 9.42463034481312, "learning_rate": 3.6550000000000004e-07, "loss": 0.4757, "step": 750 }, { "epoch": 0.03, "grad_norm": 9.002220474801542, "learning_rate": 3.78e-07, "loss": 0.4235, "step": 775 }, { "epoch": 0.03, "grad_norm": 9.249177989263307, "learning_rate": 3.9050000000000006e-07, "loss": 0.3844, "step": 800 }, { "epoch": 0.03, "grad_norm": 9.222274718732235, "learning_rate": 4.0300000000000005e-07, "loss": 0.4303, "step": 825 }, { "epoch": 0.03, "grad_norm": 9.29593645258072, "learning_rate": 4.155e-07, "loss": 0.3783, "step": 850 }, { "epoch": 0.03, "grad_norm": 8.0820275737306, "learning_rate": 4.28e-07, "loss": 0.3866, "step": 875 }, { "epoch": 0.04, "grad_norm": 9.284235367838265, "learning_rate": 4.405e-07, "loss": 0.3863, "step": 900 }, { "epoch": 0.04, "grad_norm": 7.990489183242782, "learning_rate": 4.5300000000000005e-07, "loss": 0.3868, "step": 925 }, { "epoch": 0.04, "grad_norm": 10.826149813494483, "learning_rate": 4.6550000000000003e-07, "loss": 0.3962, "step": 950 }, { "epoch": 0.04, "grad_norm": 12.652924783202419, "learning_rate": 4.78e-07, "loss": 0.4151, "step": 975 }, { "epoch": 0.04, "grad_norm": 7.231122893213122, "learning_rate": 4.905000000000001e-07, "loss": 0.4139, "step": 1000 }, { "epoch": 0.04, "grad_norm": 8.284357637366583, "learning_rate": 5.03e-07, "loss": 0.4003, "step": 1025 }, { "epoch": 0.04, "grad_norm": 9.764967682696836, "learning_rate": 5.155e-07, "loss": 0.3424, "step": 1050 }, { "epoch": 0.04, "grad_norm": 8.623487096739643, "learning_rate": 5.280000000000001e-07, "loss": 0.3726, "step": 1075 }, { "epoch": 0.04, "grad_norm": 9.118786193989378, "learning_rate": 5.405000000000001e-07, "loss": 0.3628, "step": 1100 }, { "epoch": 0.04, "grad_norm": 8.266772083450553, "learning_rate": 5.53e-07, "loss": 0.3585, "step": 1125 }, { "epoch": 0.05, "grad_norm": 7.9161160573400835, "learning_rate": 5.655e-07, "loss": 0.3638, "step": 1150 }, { "epoch": 0.05, "grad_norm": 8.957547049197, "learning_rate": 5.78e-07, "loss": 0.3543, "step": 1175 }, { "epoch": 0.05, "grad_norm": 7.64987127577368, "learning_rate": 5.905e-07, "loss": 0.3635, "step": 1200 }, { "epoch": 0.05, "grad_norm": 7.6784759653959345, "learning_rate": 6.030000000000001e-07, "loss": 0.3822, "step": 1225 }, { "epoch": 0.05, "grad_norm": 8.353852424599333, "learning_rate": 6.155000000000001e-07, "loss": 0.3715, "step": 1250 }, { "epoch": 0.05, "grad_norm": 7.886970263184239, "learning_rate": 6.28e-07, "loss": 0.3751, "step": 1275 }, { "epoch": 0.05, "grad_norm": 7.1464634900527635, "learning_rate": 6.405e-07, "loss": 0.3637, "step": 1300 }, { "epoch": 0.05, "grad_norm": 8.365126116348344, "learning_rate": 6.53e-07, "loss": 0.3717, "step": 1325 }, { "epoch": 0.05, "grad_norm": 8.264665698841982, "learning_rate": 6.655e-07, "loss": 0.3815, "step": 1350 }, { "epoch": 0.05, "grad_norm": 8.817525071240404, "learning_rate": 6.78e-07, "loss": 0.3192, "step": 1375 }, { "epoch": 0.06, "grad_norm": 8.541821815933655, "learning_rate": 6.905000000000001e-07, "loss": 0.3682, "step": 1400 }, { "epoch": 0.06, "grad_norm": 8.775244243834967, "learning_rate": 7.030000000000001e-07, "loss": 0.3583, "step": 1425 }, { "epoch": 0.06, "grad_norm": 8.699420120871023, "learning_rate": 7.155000000000001e-07, "loss": 0.353, "step": 1450 }, { "epoch": 0.06, "grad_norm": 7.3256513159082886, "learning_rate": 7.280000000000001e-07, "loss": 0.3475, "step": 1475 }, { "epoch": 0.06, "grad_norm": 8.954227102446838, "learning_rate": 7.405000000000002e-07, "loss": 0.3531, "step": 1500 }, { "epoch": 0.06, "grad_norm": 7.268304372701325, "learning_rate": 7.530000000000001e-07, "loss": 0.3641, "step": 1525 }, { "epoch": 0.06, "grad_norm": 8.594146689998704, "learning_rate": 7.655000000000001e-07, "loss": 0.3539, "step": 1550 }, { "epoch": 0.06, "grad_norm": 8.380277920462982, "learning_rate": 7.78e-07, "loss": 0.3398, "step": 1575 }, { "epoch": 0.06, "grad_norm": 8.249645134391981, "learning_rate": 7.905e-07, "loss": 0.3317, "step": 1600 }, { "epoch": 0.06, "grad_norm": 9.168651318042285, "learning_rate": 8.03e-07, "loss": 0.3347, "step": 1625 }, { "epoch": 0.06, "grad_norm": 7.779448193291338, "learning_rate": 8.155000000000001e-07, "loss": 0.3574, "step": 1650 }, { "epoch": 0.07, "grad_norm": 8.413939794725913, "learning_rate": 8.280000000000001e-07, "loss": 0.3444, "step": 1675 }, { "epoch": 0.07, "grad_norm": 8.011851874436536, "learning_rate": 8.405e-07, "loss": 0.346, "step": 1700 }, { "epoch": 0.07, "grad_norm": 7.4563682729389615, "learning_rate": 8.53e-07, "loss": 0.3496, "step": 1725 }, { "epoch": 0.07, "grad_norm": 8.042812209726682, "learning_rate": 8.655000000000001e-07, "loss": 0.3251, "step": 1750 }, { "epoch": 0.07, "grad_norm": 8.982650134613024, "learning_rate": 8.780000000000001e-07, "loss": 0.3312, "step": 1775 }, { "epoch": 0.07, "grad_norm": 6.208414233430044, "learning_rate": 8.905000000000001e-07, "loss": 0.3214, "step": 1800 }, { "epoch": 0.07, "grad_norm": 7.3406319348355105, "learning_rate": 9.030000000000001e-07, "loss": 0.351, "step": 1825 }, { "epoch": 0.07, "grad_norm": 10.869539965379989, "learning_rate": 9.155000000000002e-07, "loss": 0.3277, "step": 1850 }, { "epoch": 0.07, "grad_norm": 7.619686331747831, "learning_rate": 9.28e-07, "loss": 0.3419, "step": 1875 }, { "epoch": 0.07, "grad_norm": 7.690019286737048, "learning_rate": 9.405e-07, "loss": 0.3313, "step": 1900 }, { "epoch": 0.08, "grad_norm": 8.40223765276817, "learning_rate": 9.53e-07, "loss": 0.3259, "step": 1925 }, { "epoch": 0.08, "grad_norm": 9.092936931428083, "learning_rate": 9.655000000000001e-07, "loss": 0.325, "step": 1950 }, { "epoch": 0.08, "grad_norm": 8.59220421800555, "learning_rate": 9.78e-07, "loss": 0.3364, "step": 1975 }, { "epoch": 0.08, "grad_norm": 7.215626189268482, "learning_rate": 9.905e-07, "loss": 0.3372, "step": 2000 }, { "epoch": 0.08, "grad_norm": 7.7827726836036035, "learning_rate": 1.0030000000000002e-06, "loss": 0.2945, "step": 2025 }, { "epoch": 0.08, "grad_norm": 8.846888472808441, "learning_rate": 1.0155e-06, "loss": 0.3352, "step": 2050 }, { "epoch": 0.08, "grad_norm": 7.524600018661036, "learning_rate": 1.0280000000000002e-06, "loss": 0.325, "step": 2075 }, { "epoch": 0.08, "grad_norm": 8.029269384160894, "learning_rate": 1.0405e-06, "loss": 0.3445, "step": 2100 }, { "epoch": 0.08, "grad_norm": 6.541151743266427, "learning_rate": 1.0530000000000001e-06, "loss": 0.3487, "step": 2125 }, { "epoch": 0.08, "grad_norm": 9.597600892762339, "learning_rate": 1.0655000000000002e-06, "loss": 0.3384, "step": 2150 }, { "epoch": 0.09, "grad_norm": 8.130328659028095, "learning_rate": 1.078e-06, "loss": 0.3482, "step": 2175 }, { "epoch": 0.09, "grad_norm": 8.4990076386556, "learning_rate": 1.0905e-06, "loss": 0.3026, "step": 2200 }, { "epoch": 0.09, "grad_norm": 7.422295132082161, "learning_rate": 1.103e-06, "loss": 0.3057, "step": 2225 }, { "epoch": 0.09, "grad_norm": 8.017101554617902, "learning_rate": 1.1155e-06, "loss": 0.3233, "step": 2250 }, { "epoch": 0.09, "grad_norm": 8.25880061391202, "learning_rate": 1.128e-06, "loss": 0.3046, "step": 2275 }, { "epoch": 0.09, "grad_norm": 7.812137488778935, "learning_rate": 1.1405000000000001e-06, "loss": 0.3125, "step": 2300 }, { "epoch": 0.09, "grad_norm": 9.88024638341902, "learning_rate": 1.153e-06, "loss": 0.3322, "step": 2325 }, { "epoch": 0.09, "grad_norm": 9.480678112460733, "learning_rate": 1.1655000000000001e-06, "loss": 0.3506, "step": 2350 }, { "epoch": 0.09, "grad_norm": 7.625242578507233, "learning_rate": 1.1780000000000002e-06, "loss": 0.3081, "step": 2375 }, { "epoch": 0.09, "grad_norm": 8.471850154248505, "learning_rate": 1.1905e-06, "loss": 0.3411, "step": 2400 }, { "epoch": 0.1, "grad_norm": 7.700212579759201, "learning_rate": 1.2030000000000002e-06, "loss": 0.2955, "step": 2425 }, { "epoch": 0.1, "grad_norm": 7.640668754634842, "learning_rate": 1.2155e-06, "loss": 0.3147, "step": 2450 }, { "epoch": 0.1, "grad_norm": 6.142799254759943, "learning_rate": 1.2280000000000001e-06, "loss": 0.3223, "step": 2475 }, { "epoch": 0.1, "grad_norm": 9.273025415582895, "learning_rate": 1.2405e-06, "loss": 0.3183, "step": 2500 }, { "epoch": 0.1, "grad_norm": 8.15480159136244, "learning_rate": 1.2530000000000001e-06, "loss": 0.3042, "step": 2525 }, { "epoch": 0.1, "grad_norm": 7.986248278662067, "learning_rate": 1.2655e-06, "loss": 0.2898, "step": 2550 }, { "epoch": 0.1, "grad_norm": 6.9880022706315765, "learning_rate": 1.278e-06, "loss": 0.3342, "step": 2575 }, { "epoch": 0.1, "grad_norm": 6.797582197495177, "learning_rate": 1.2905000000000002e-06, "loss": 0.3204, "step": 2600 }, { "epoch": 0.1, "grad_norm": 9.63428878101681, "learning_rate": 1.303e-06, "loss": 0.3106, "step": 2625 }, { "epoch": 0.1, "grad_norm": 7.016874596706454, "learning_rate": 1.3155000000000002e-06, "loss": 0.3034, "step": 2650 }, { "epoch": 0.11, "grad_norm": 8.761116991346277, "learning_rate": 1.328e-06, "loss": 0.3125, "step": 2675 }, { "epoch": 0.11, "grad_norm": 9.147210364841152, "learning_rate": 1.3405000000000001e-06, "loss": 0.3212, "step": 2700 }, { "epoch": 0.11, "grad_norm": 7.198210069471468, "learning_rate": 1.3530000000000002e-06, "loss": 0.3005, "step": 2725 }, { "epoch": 0.11, "grad_norm": 9.62021097760392, "learning_rate": 1.3655e-06, "loss": 0.3095, "step": 2750 }, { "epoch": 0.11, "grad_norm": 8.498105116813674, "learning_rate": 1.3780000000000002e-06, "loss": 0.2968, "step": 2775 }, { "epoch": 0.11, "grad_norm": 9.096234425980665, "learning_rate": 1.3905000000000003e-06, "loss": 0.3013, "step": 2800 }, { "epoch": 0.11, "grad_norm": 9.529807578803446, "learning_rate": 1.4030000000000002e-06, "loss": 0.3285, "step": 2825 }, { "epoch": 0.11, "grad_norm": 8.090935769510644, "learning_rate": 1.4155000000000003e-06, "loss": 0.3186, "step": 2850 }, { "epoch": 0.11, "grad_norm": 8.142203590894807, "learning_rate": 1.4280000000000001e-06, "loss": 0.3283, "step": 2875 }, { "epoch": 0.11, "grad_norm": 7.5553560310705885, "learning_rate": 1.4405000000000002e-06, "loss": 0.3078, "step": 2900 }, { "epoch": 0.12, "grad_norm": 7.030443253782761, "learning_rate": 1.4530000000000003e-06, "loss": 0.3259, "step": 2925 }, { "epoch": 0.12, "grad_norm": 6.45791822784214, "learning_rate": 1.4655000000000002e-06, "loss": 0.3071, "step": 2950 }, { "epoch": 0.12, "grad_norm": 7.169665682759633, "learning_rate": 1.478e-06, "loss": 0.3059, "step": 2975 }, { "epoch": 0.12, "grad_norm": 9.903163712753615, "learning_rate": 1.4905e-06, "loss": 0.3201, "step": 3000 }, { "epoch": 0.12, "grad_norm": 3.4235199170734267, "learning_rate": 1.5025e-06, "loss": 0.3193, "step": 3025 }, { "epoch": 0.12, "grad_norm": 7.388562008472956, "learning_rate": 1.5145e-06, "loss": 0.3154, "step": 3050 }, { "epoch": 0.12, "grad_norm": 7.32478874239276, "learning_rate": 1.5270000000000002e-06, "loss": 0.3341, "step": 3075 }, { "epoch": 0.12, "grad_norm": 5.882897860372599, "learning_rate": 1.5395000000000003e-06, "loss": 0.2996, "step": 3100 }, { "epoch": 0.12, "grad_norm": 8.830461306218142, "learning_rate": 1.5520000000000001e-06, "loss": 0.2997, "step": 3125 }, { "epoch": 0.12, "grad_norm": 7.340070140671299, "learning_rate": 1.5645000000000002e-06, "loss": 0.2984, "step": 3150 }, { "epoch": 0.12, "grad_norm": 6.491658642899819, "learning_rate": 1.577e-06, "loss": 0.3165, "step": 3175 }, { "epoch": 0.13, "grad_norm": 8.69433147934288, "learning_rate": 1.5895000000000002e-06, "loss": 0.3031, "step": 3200 }, { "epoch": 0.13, "grad_norm": 8.314351227889603, "learning_rate": 1.6020000000000003e-06, "loss": 0.3018, "step": 3225 }, { "epoch": 0.13, "grad_norm": 8.310085290349688, "learning_rate": 1.6145000000000002e-06, "loss": 0.2997, "step": 3250 }, { "epoch": 0.13, "grad_norm": 10.305879482033044, "learning_rate": 1.6270000000000003e-06, "loss": 0.3054, "step": 3275 }, { "epoch": 0.13, "grad_norm": 7.40127326870802, "learning_rate": 1.6395000000000004e-06, "loss": 0.3134, "step": 3300 }, { "epoch": 0.13, "grad_norm": 6.850845319267836, "learning_rate": 1.6520000000000002e-06, "loss": 0.3062, "step": 3325 }, { "epoch": 0.13, "grad_norm": 7.047475032917652, "learning_rate": 1.6645e-06, "loss": 0.2863, "step": 3350 }, { "epoch": 0.13, "grad_norm": 8.725844976704757, "learning_rate": 1.677e-06, "loss": 0.2891, "step": 3375 }, { "epoch": 0.13, "grad_norm": 7.78153953615269, "learning_rate": 1.6895e-06, "loss": 0.3248, "step": 3400 }, { "epoch": 0.13, "grad_norm": 7.696888074356876, "learning_rate": 1.702e-06, "loss": 0.3188, "step": 3425 }, { "epoch": 0.14, "grad_norm": 7.676273130360383, "learning_rate": 1.7145e-06, "loss": 0.2988, "step": 3450 }, { "epoch": 0.14, "grad_norm": 6.006703021039653, "learning_rate": 1.7270000000000002e-06, "loss": 0.3086, "step": 3475 }, { "epoch": 0.14, "grad_norm": 8.465714704473822, "learning_rate": 1.7395e-06, "loss": 0.3052, "step": 3500 }, { "epoch": 0.14, "grad_norm": 8.170356640181524, "learning_rate": 1.7520000000000001e-06, "loss": 0.2844, "step": 3525 }, { "epoch": 0.14, "grad_norm": 7.411466808472138, "learning_rate": 1.7645e-06, "loss": 0.2994, "step": 3550 }, { "epoch": 0.14, "grad_norm": 8.624191447880747, "learning_rate": 1.777e-06, "loss": 0.3387, "step": 3575 }, { "epoch": 0.14, "grad_norm": 7.697489245961635, "learning_rate": 1.7895000000000002e-06, "loss": 0.2976, "step": 3600 }, { "epoch": 0.14, "grad_norm": 8.358874355083804, "learning_rate": 1.802e-06, "loss": 0.3221, "step": 3625 }, { "epoch": 0.14, "grad_norm": 7.701960236286209, "learning_rate": 1.8145000000000002e-06, "loss": 0.2778, "step": 3650 }, { "epoch": 0.14, "grad_norm": 7.093145317658292, "learning_rate": 1.8270000000000003e-06, "loss": 0.2809, "step": 3675 }, { "epoch": 0.15, "grad_norm": 7.5735424824748065, "learning_rate": 1.8395000000000001e-06, "loss": 0.3094, "step": 3700 }, { "epoch": 0.15, "grad_norm": 7.111126859679747, "learning_rate": 1.8520000000000002e-06, "loss": 0.2811, "step": 3725 }, { "epoch": 0.15, "grad_norm": 7.603638409237136, "learning_rate": 1.8645e-06, "loss": 0.3065, "step": 3750 }, { "epoch": 0.15, "grad_norm": 9.4063632414015, "learning_rate": 1.8770000000000002e-06, "loss": 0.3027, "step": 3775 }, { "epoch": 0.15, "grad_norm": 7.494637512263619, "learning_rate": 1.8895000000000003e-06, "loss": 0.3136, "step": 3800 }, { "epoch": 0.15, "grad_norm": 7.048441681493421, "learning_rate": 1.9020000000000002e-06, "loss": 0.3337, "step": 3825 }, { "epoch": 0.15, "grad_norm": 4.438034199516792, "learning_rate": 1.9145e-06, "loss": 0.269, "step": 3850 }, { "epoch": 0.15, "grad_norm": 7.358960525686304, "learning_rate": 1.9270000000000004e-06, "loss": 0.3209, "step": 3875 }, { "epoch": 0.15, "grad_norm": 6.925504929075971, "learning_rate": 1.9395000000000002e-06, "loss": 0.2988, "step": 3900 }, { "epoch": 0.15, "grad_norm": 6.739147879633613, "learning_rate": 1.952e-06, "loss": 0.2722, "step": 3925 }, { "epoch": 0.16, "grad_norm": 4.776015161103489, "learning_rate": 1.9645000000000004e-06, "loss": 0.2798, "step": 3950 }, { "epoch": 0.16, "grad_norm": 8.056155062567179, "learning_rate": 1.977e-06, "loss": 0.3053, "step": 3975 }, { "epoch": 0.16, "grad_norm": 6.121259851550022, "learning_rate": 1.9895e-06, "loss": 0.2838, "step": 4000 }, { "epoch": 0.16, "grad_norm": 6.30300122513391, "learning_rate": 2.002e-06, "loss": 0.3092, "step": 4025 }, { "epoch": 0.16, "grad_norm": 7.035093216193755, "learning_rate": 2.0145e-06, "loss": 0.2989, "step": 4050 }, { "epoch": 0.16, "grad_norm": 7.312870093933501, "learning_rate": 2.0270000000000002e-06, "loss": 0.2909, "step": 4075 }, { "epoch": 0.16, "grad_norm": 7.1161318992697105, "learning_rate": 2.0395e-06, "loss": 0.3155, "step": 4100 }, { "epoch": 0.16, "grad_norm": 8.985023001843178, "learning_rate": 2.052e-06, "loss": 0.2811, "step": 4125 }, { "epoch": 0.16, "grad_norm": 8.490009552023306, "learning_rate": 2.0645000000000003e-06, "loss": 0.2943, "step": 4150 }, { "epoch": 0.16, "grad_norm": 6.036226568442008, "learning_rate": 2.077e-06, "loss": 0.3051, "step": 4175 }, { "epoch": 0.17, "grad_norm": 7.160882683593939, "learning_rate": 2.0895e-06, "loss": 0.3128, "step": 4200 }, { "epoch": 0.17, "grad_norm": 6.899026627729313, "learning_rate": 2.102e-06, "loss": 0.3007, "step": 4225 }, { "epoch": 0.17, "grad_norm": 6.667556679571071, "learning_rate": 2.1145000000000003e-06, "loss": 0.294, "step": 4250 }, { "epoch": 0.17, "grad_norm": 8.45891735416569, "learning_rate": 2.127e-06, "loss": 0.3072, "step": 4275 }, { "epoch": 0.17, "grad_norm": 7.247465785827324, "learning_rate": 2.1395e-06, "loss": 0.2987, "step": 4300 }, { "epoch": 0.17, "grad_norm": 6.925041488918864, "learning_rate": 2.1520000000000003e-06, "loss": 0.3038, "step": 4325 }, { "epoch": 0.17, "grad_norm": 7.697636253311961, "learning_rate": 2.1645e-06, "loss": 0.2896, "step": 4350 }, { "epoch": 0.17, "grad_norm": 5.671845123322663, "learning_rate": 2.177e-06, "loss": 0.3013, "step": 4375 }, { "epoch": 0.17, "grad_norm": 14.42965685715927, "learning_rate": 2.1895000000000004e-06, "loss": 0.2935, "step": 4400 }, { "epoch": 0.17, "grad_norm": 7.619313005480762, "learning_rate": 2.2020000000000003e-06, "loss": 0.2822, "step": 4425 }, { "epoch": 0.18, "grad_norm": 5.96836481945949, "learning_rate": 2.2145e-06, "loss": 0.3013, "step": 4450 }, { "epoch": 0.18, "grad_norm": 8.013522956861229, "learning_rate": 2.2270000000000004e-06, "loss": 0.2998, "step": 4475 }, { "epoch": 0.18, "grad_norm": 6.564283110102572, "learning_rate": 2.2395000000000003e-06, "loss": 0.3069, "step": 4500 }, { "epoch": 0.18, "grad_norm": 7.412696409963094, "learning_rate": 2.252e-06, "loss": 0.2911, "step": 4525 }, { "epoch": 0.18, "grad_norm": 6.532716257217559, "learning_rate": 2.2645000000000005e-06, "loss": 0.293, "step": 4550 }, { "epoch": 0.18, "grad_norm": 6.44868242354979, "learning_rate": 2.2770000000000004e-06, "loss": 0.2785, "step": 4575 }, { "epoch": 0.18, "grad_norm": 6.10635902763617, "learning_rate": 2.2895e-06, "loss": 0.2966, "step": 4600 }, { "epoch": 0.18, "grad_norm": 8.249805780154833, "learning_rate": 2.302e-06, "loss": 0.3102, "step": 4625 }, { "epoch": 0.18, "grad_norm": 8.204571957255101, "learning_rate": 2.3145e-06, "loss": 0.309, "step": 4650 }, { "epoch": 0.18, "grad_norm": 7.803854804655206, "learning_rate": 2.327e-06, "loss": 0.2814, "step": 4675 }, { "epoch": 0.18, "grad_norm": 6.440776762453003, "learning_rate": 2.3395000000000002e-06, "loss": 0.3056, "step": 4700 }, { "epoch": 0.19, "grad_norm": 8.15879339209754, "learning_rate": 2.352e-06, "loss": 0.2964, "step": 4725 }, { "epoch": 0.19, "grad_norm": 5.891657048350974, "learning_rate": 2.3645e-06, "loss": 0.2815, "step": 4750 }, { "epoch": 0.19, "grad_norm": 7.240599749344253, "learning_rate": 2.3770000000000003e-06, "loss": 0.2854, "step": 4775 }, { "epoch": 0.19, "grad_norm": 5.975988999533506, "learning_rate": 2.3895e-06, "loss": 0.2888, "step": 4800 }, { "epoch": 0.19, "grad_norm": 8.013535222990837, "learning_rate": 2.402e-06, "loss": 0.2954, "step": 4825 }, { "epoch": 0.19, "grad_norm": 11.029743044779613, "learning_rate": 2.4145000000000003e-06, "loss": 0.2884, "step": 4850 }, { "epoch": 0.19, "grad_norm": 6.956958714016763, "learning_rate": 2.4270000000000002e-06, "loss": 0.2969, "step": 4875 }, { "epoch": 0.19, "grad_norm": 7.823749748330595, "learning_rate": 2.4395e-06, "loss": 0.3081, "step": 4900 }, { "epoch": 0.19, "grad_norm": 6.2241357450122425, "learning_rate": 2.4520000000000004e-06, "loss": 0.2992, "step": 4925 }, { "epoch": 0.19, "grad_norm": 9.16911141354443, "learning_rate": 2.4645000000000003e-06, "loss": 0.3245, "step": 4950 }, { "epoch": 0.2, "grad_norm": 6.060217712197034, "learning_rate": 2.477e-06, "loss": 0.3118, "step": 4975 }, { "epoch": 0.2, "grad_norm": 7.425673795449125, "learning_rate": 2.4895e-06, "loss": 0.2662, "step": 5000 }, { "epoch": 0.2, "grad_norm": 6.595958329136231, "learning_rate": 2.502e-06, "loss": 0.3158, "step": 5025 }, { "epoch": 0.2, "grad_norm": 3.6550675982874363, "learning_rate": 2.5140000000000004e-06, "loss": 0.28, "step": 5050 }, { "epoch": 0.2, "grad_norm": 7.79782451562284, "learning_rate": 2.5265e-06, "loss": 0.2879, "step": 5075 }, { "epoch": 0.2, "grad_norm": 8.451642556039346, "learning_rate": 2.539e-06, "loss": 0.305, "step": 5100 }, { "epoch": 0.2, "grad_norm": 6.916829339590843, "learning_rate": 2.5515e-06, "loss": 0.271, "step": 5125 }, { "epoch": 0.2, "grad_norm": 6.245252699817357, "learning_rate": 2.5640000000000004e-06, "loss": 0.2763, "step": 5150 }, { "epoch": 0.2, "grad_norm": 7.212402917259702, "learning_rate": 2.5765000000000002e-06, "loss": 0.2763, "step": 5175 }, { "epoch": 0.2, "grad_norm": 6.823573409166045, "learning_rate": 2.5890000000000005e-06, "loss": 0.3001, "step": 5200 }, { "epoch": 0.21, "grad_norm": 7.940683369097169, "learning_rate": 2.6015e-06, "loss": 0.3116, "step": 5225 }, { "epoch": 0.21, "grad_norm": 6.3976420123516755, "learning_rate": 2.6140000000000003e-06, "loss": 0.2606, "step": 5250 }, { "epoch": 0.21, "grad_norm": 7.791330529626596, "learning_rate": 2.6265e-06, "loss": 0.2826, "step": 5275 }, { "epoch": 0.21, "grad_norm": 8.076143804816086, "learning_rate": 2.6390000000000005e-06, "loss": 0.2833, "step": 5300 }, { "epoch": 0.21, "grad_norm": 10.190822919416679, "learning_rate": 2.6515000000000004e-06, "loss": 0.3373, "step": 5325 }, { "epoch": 0.21, "grad_norm": 6.8692012310423465, "learning_rate": 2.6640000000000007e-06, "loss": 0.3403, "step": 5350 }, { "epoch": 0.21, "grad_norm": 7.442368544772186, "learning_rate": 2.6765e-06, "loss": 0.2975, "step": 5375 }, { "epoch": 0.21, "grad_norm": 7.524857704750757, "learning_rate": 2.689e-06, "loss": 0.2758, "step": 5400 }, { "epoch": 0.21, "grad_norm": 8.21051104357922, "learning_rate": 2.7015000000000003e-06, "loss": 0.2862, "step": 5425 }, { "epoch": 0.21, "grad_norm": 7.6759388918523985, "learning_rate": 2.7139999999999998e-06, "loss": 0.2744, "step": 5450 }, { "epoch": 0.22, "grad_norm": 7.630174118449949, "learning_rate": 2.7265e-06, "loss": 0.2988, "step": 5475 }, { "epoch": 0.22, "grad_norm": 6.357716937591224, "learning_rate": 2.739e-06, "loss": 0.2827, "step": 5500 }, { "epoch": 0.22, "grad_norm": 6.779528956514759, "learning_rate": 2.7515000000000003e-06, "loss": 0.2589, "step": 5525 }, { "epoch": 0.22, "grad_norm": 9.02168399367326, "learning_rate": 2.764e-06, "loss": 0.2819, "step": 5550 }, { "epoch": 0.22, "grad_norm": 6.823990587562972, "learning_rate": 2.7765000000000004e-06, "loss": 0.3137, "step": 5575 }, { "epoch": 0.22, "grad_norm": 7.019705933767146, "learning_rate": 2.789e-06, "loss": 0.3069, "step": 5600 }, { "epoch": 0.22, "grad_norm": 7.7318630027454995, "learning_rate": 2.8015e-06, "loss": 0.2665, "step": 5625 }, { "epoch": 0.22, "grad_norm": 8.11880507789445, "learning_rate": 2.814e-06, "loss": 0.2872, "step": 5650 }, { "epoch": 0.22, "grad_norm": 5.7977850369642505, "learning_rate": 2.8265000000000004e-06, "loss": 0.283, "step": 5675 }, { "epoch": 0.22, "grad_norm": 6.341136733880947, "learning_rate": 2.8390000000000003e-06, "loss": 0.3007, "step": 5700 }, { "epoch": 0.23, "grad_norm": 5.970881329915707, "learning_rate": 2.8515000000000006e-06, "loss": 0.2784, "step": 5725 }, { "epoch": 0.23, "grad_norm": 7.729318544130669, "learning_rate": 2.864e-06, "loss": 0.3107, "step": 5750 }, { "epoch": 0.23, "grad_norm": 6.546507761712345, "learning_rate": 2.8765000000000003e-06, "loss": 0.2822, "step": 5775 }, { "epoch": 0.23, "grad_norm": 4.978696715726333, "learning_rate": 2.889e-06, "loss": 0.288, "step": 5800 }, { "epoch": 0.23, "grad_norm": 9.228311134929747, "learning_rate": 2.9015000000000005e-06, "loss": 0.2903, "step": 5825 }, { "epoch": 0.23, "grad_norm": 7.909081531042711, "learning_rate": 2.914e-06, "loss": 0.3001, "step": 5850 }, { "epoch": 0.23, "grad_norm": 9.066473702285178, "learning_rate": 2.9265000000000003e-06, "loss": 0.3, "step": 5875 }, { "epoch": 0.23, "grad_norm": 4.981022806434303, "learning_rate": 2.939e-06, "loss": 0.2623, "step": 5900 }, { "epoch": 0.23, "grad_norm": 7.451955876526818, "learning_rate": 2.9515000000000005e-06, "loss": 0.3001, "step": 5925 }, { "epoch": 0.23, "grad_norm": 5.213926487478616, "learning_rate": 2.9640000000000003e-06, "loss": 0.2856, "step": 5950 }, { "epoch": 0.24, "grad_norm": 7.590418657876296, "learning_rate": 2.9765000000000006e-06, "loss": 0.3048, "step": 5975 }, { "epoch": 0.24, "grad_norm": 6.618581560821221, "learning_rate": 2.989e-06, "loss": 0.2802, "step": 6000 }, { "epoch": 0.24, "grad_norm": 7.747177181686857, "learning_rate": 3.0015e-06, "loss": 0.3047, "step": 6025 }, { "epoch": 0.24, "grad_norm": 2.702779308707192, "learning_rate": 3.0140000000000003e-06, "loss": 0.2845, "step": 6050 }, { "epoch": 0.24, "grad_norm": 7.466401914069188, "learning_rate": 3.026e-06, "loss": 0.2889, "step": 6075 }, { "epoch": 0.24, "grad_norm": 8.750175939483835, "learning_rate": 3.0385000000000002e-06, "loss": 0.2998, "step": 6100 }, { "epoch": 0.24, "grad_norm": 6.052264883280283, "learning_rate": 3.051e-06, "loss": 0.3153, "step": 6125 }, { "epoch": 0.24, "grad_norm": 6.1350193715664165, "learning_rate": 3.0635000000000004e-06, "loss": 0.2599, "step": 6150 }, { "epoch": 0.24, "grad_norm": 4.948910715297065, "learning_rate": 3.0760000000000003e-06, "loss": 0.2711, "step": 6175 }, { "epoch": 0.24, "grad_norm": 6.252587211408633, "learning_rate": 3.0885000000000006e-06, "loss": 0.295, "step": 6200 }, { "epoch": 0.24, "grad_norm": 6.165520107137274, "learning_rate": 3.101e-06, "loss": 0.2763, "step": 6225 }, { "epoch": 0.25, "grad_norm": 7.4996014769823915, "learning_rate": 3.1135000000000003e-06, "loss": 0.2849, "step": 6250 }, { "epoch": 0.25, "grad_norm": 6.964362624237183, "learning_rate": 3.1260000000000002e-06, "loss": 0.3028, "step": 6275 }, { "epoch": 0.25, "grad_norm": 7.169773589103691, "learning_rate": 3.1385000000000005e-06, "loss": 0.278, "step": 6300 }, { "epoch": 0.25, "grad_norm": 8.406835628812196, "learning_rate": 3.151e-06, "loss": 0.2558, "step": 6325 }, { "epoch": 0.25, "grad_norm": 8.084464446709841, "learning_rate": 3.1635000000000003e-06, "loss": 0.284, "step": 6350 }, { "epoch": 0.25, "grad_norm": 6.694245128142803, "learning_rate": 3.176e-06, "loss": 0.2611, "step": 6375 }, { "epoch": 0.25, "grad_norm": 6.564775176663976, "learning_rate": 3.1885000000000005e-06, "loss": 0.277, "step": 6400 }, { "epoch": 0.25, "grad_norm": 7.356766955805626, "learning_rate": 3.2010000000000004e-06, "loss": 0.2585, "step": 6425 }, { "epoch": 0.25, "grad_norm": 6.649934835176111, "learning_rate": 3.2135000000000007e-06, "loss": 0.2556, "step": 6450 }, { "epoch": 0.25, "grad_norm": 6.635433701155168, "learning_rate": 3.226e-06, "loss": 0.291, "step": 6475 }, { "epoch": 0.26, "grad_norm": 6.724811843119688, "learning_rate": 3.2385000000000004e-06, "loss": 0.2774, "step": 6500 }, { "epoch": 0.26, "grad_norm": 8.629267878070488, "learning_rate": 3.2510000000000003e-06, "loss": 0.288, "step": 6525 }, { "epoch": 0.26, "grad_norm": 7.037964444193315, "learning_rate": 3.2635e-06, "loss": 0.2723, "step": 6550 }, { "epoch": 0.26, "grad_norm": 7.182370121813334, "learning_rate": 3.2760000000000005e-06, "loss": 0.3106, "step": 6575 }, { "epoch": 0.26, "grad_norm": 7.1528459015462245, "learning_rate": 3.2885e-06, "loss": 0.2692, "step": 6600 }, { "epoch": 0.26, "grad_norm": 6.500105672722975, "learning_rate": 3.3010000000000002e-06, "loss": 0.3031, "step": 6625 }, { "epoch": 0.26, "grad_norm": 5.834985512213181, "learning_rate": 3.3135e-06, "loss": 0.2664, "step": 6650 }, { "epoch": 0.26, "grad_norm": 8.19309701510752, "learning_rate": 3.3260000000000004e-06, "loss": 0.2723, "step": 6675 }, { "epoch": 0.26, "grad_norm": 8.139308511625616, "learning_rate": 3.3385e-06, "loss": 0.2839, "step": 6700 }, { "epoch": 0.26, "grad_norm": 7.3241076676376435, "learning_rate": 3.351e-06, "loss": 0.2806, "step": 6725 }, { "epoch": 0.27, "grad_norm": 6.98434229153364, "learning_rate": 3.3635e-06, "loss": 0.28, "step": 6750 }, { "epoch": 0.27, "grad_norm": 7.129264792301614, "learning_rate": 3.3760000000000004e-06, "loss": 0.282, "step": 6775 }, { "epoch": 0.27, "grad_norm": 8.170862565343718, "learning_rate": 3.3885000000000003e-06, "loss": 0.2858, "step": 6800 }, { "epoch": 0.27, "grad_norm": 7.838114805347313, "learning_rate": 3.4010000000000006e-06, "loss": 0.2821, "step": 6825 }, { "epoch": 0.27, "grad_norm": 7.030750778829712, "learning_rate": 3.4135e-06, "loss": 0.2925, "step": 6850 }, { "epoch": 0.27, "grad_norm": 7.186794776187053, "learning_rate": 3.4260000000000003e-06, "loss": 0.2813, "step": 6875 }, { "epoch": 0.27, "grad_norm": 6.6235944995301645, "learning_rate": 3.4385e-06, "loss": 0.2884, "step": 6900 }, { "epoch": 0.27, "grad_norm": 6.471229594532035, "learning_rate": 3.4510000000000005e-06, "loss": 0.2759, "step": 6925 }, { "epoch": 0.27, "grad_norm": 7.0819247117845, "learning_rate": 3.4635000000000004e-06, "loss": 0.2738, "step": 6950 }, { "epoch": 0.27, "grad_norm": 5.998542491050096, "learning_rate": 3.4760000000000007e-06, "loss": 0.2878, "step": 6975 }, { "epoch": 0.28, "grad_norm": 6.461630412552012, "learning_rate": 3.4885e-06, "loss": 0.3053, "step": 7000 }, { "epoch": 0.28, "grad_norm": 6.586574763749033, "learning_rate": 3.5010000000000004e-06, "loss": 0.2919, "step": 7025 }, { "epoch": 0.28, "grad_norm": 7.792709877852477, "learning_rate": 3.5135000000000003e-06, "loss": 0.2641, "step": 7050 }, { "epoch": 0.28, "grad_norm": 6.911279814176992, "learning_rate": 3.5255e-06, "loss": 0.2902, "step": 7075 }, { "epoch": 0.28, "grad_norm": 6.067894245771899, "learning_rate": 3.5380000000000003e-06, "loss": 0.2668, "step": 7100 }, { "epoch": 0.28, "grad_norm": 7.1730720185264785, "learning_rate": 3.5505e-06, "loss": 0.2602, "step": 7125 }, { "epoch": 0.28, "grad_norm": 6.9493924947782295, "learning_rate": 3.5630000000000004e-06, "loss": 0.2878, "step": 7150 }, { "epoch": 0.28, "grad_norm": 5.022243763558953, "learning_rate": 3.5755e-06, "loss": 0.2792, "step": 7175 }, { "epoch": 0.28, "grad_norm": 7.59839736126787, "learning_rate": 3.588e-06, "loss": 0.2716, "step": 7200 }, { "epoch": 0.28, "grad_norm": 8.033887406465633, "learning_rate": 3.6005e-06, "loss": 0.3156, "step": 7225 }, { "epoch": 0.29, "grad_norm": 6.642200365588752, "learning_rate": 3.6130000000000004e-06, "loss": 0.2713, "step": 7250 }, { "epoch": 0.29, "grad_norm": 6.796078080375151, "learning_rate": 3.6255000000000003e-06, "loss": 0.2759, "step": 7275 }, { "epoch": 0.29, "grad_norm": 6.564454612601875, "learning_rate": 3.6380000000000006e-06, "loss": 0.2598, "step": 7300 }, { "epoch": 0.29, "grad_norm": 6.505268037249782, "learning_rate": 3.6505e-06, "loss": 0.2723, "step": 7325 }, { "epoch": 0.29, "grad_norm": 6.1652467945628, "learning_rate": 3.6630000000000003e-06, "loss": 0.2671, "step": 7350 }, { "epoch": 0.29, "grad_norm": 6.08578394589941, "learning_rate": 3.6755000000000002e-06, "loss": 0.2741, "step": 7375 }, { "epoch": 0.29, "grad_norm": 8.409058891602932, "learning_rate": 3.6880000000000005e-06, "loss": 0.2785, "step": 7400 }, { "epoch": 0.29, "grad_norm": 6.599948826653968, "learning_rate": 3.7005000000000004e-06, "loss": 0.2881, "step": 7425 }, { "epoch": 0.29, "grad_norm": 7.3632193157404755, "learning_rate": 3.7130000000000007e-06, "loss": 0.2754, "step": 7450 }, { "epoch": 0.29, "grad_norm": 4.580890830497889, "learning_rate": 3.7255e-06, "loss": 0.2763, "step": 7475 }, { "epoch": 0.3, "grad_norm": 7.429129214199958, "learning_rate": 3.7380000000000005e-06, "loss": 0.27, "step": 7500 }, { "epoch": 0.3, "grad_norm": 6.94826201614837, "learning_rate": 3.7505000000000003e-06, "loss": 0.2718, "step": 7525 }, { "epoch": 0.3, "grad_norm": 7.004602730126053, "learning_rate": 3.7630000000000006e-06, "loss": 0.2996, "step": 7550 }, { "epoch": 0.3, "grad_norm": 6.70321711644467, "learning_rate": 3.7755e-06, "loss": 0.2533, "step": 7575 }, { "epoch": 0.3, "grad_norm": 5.926960517500858, "learning_rate": 3.7880000000000004e-06, "loss": 0.266, "step": 7600 }, { "epoch": 0.3, "grad_norm": 7.099979599842278, "learning_rate": 3.8005000000000003e-06, "loss": 0.278, "step": 7625 }, { "epoch": 0.3, "grad_norm": 8.394314379220583, "learning_rate": 3.813e-06, "loss": 0.2794, "step": 7650 }, { "epoch": 0.3, "grad_norm": 7.984983861009723, "learning_rate": 3.8255e-06, "loss": 0.2889, "step": 7675 }, { "epoch": 0.3, "grad_norm": 6.498279020164819, "learning_rate": 3.838e-06, "loss": 0.2793, "step": 7700 }, { "epoch": 0.3, "grad_norm": 6.672802599011468, "learning_rate": 3.850500000000001e-06, "loss": 0.2954, "step": 7725 }, { "epoch": 0.3, "grad_norm": 7.429422338538009, "learning_rate": 3.863e-06, "loss": 0.3035, "step": 7750 }, { "epoch": 0.31, "grad_norm": 6.278266658380528, "learning_rate": 3.8755e-06, "loss": 0.2887, "step": 7775 }, { "epoch": 0.31, "grad_norm": 5.933838932503291, "learning_rate": 3.888e-06, "loss": 0.27, "step": 7800 }, { "epoch": 0.31, "grad_norm": 5.341557859998104, "learning_rate": 3.9005e-06, "loss": 0.2874, "step": 7825 }, { "epoch": 0.31, "grad_norm": 6.368800106240496, "learning_rate": 3.9130000000000005e-06, "loss": 0.2697, "step": 7850 }, { "epoch": 0.31, "grad_norm": 8.531746542220086, "learning_rate": 3.925500000000001e-06, "loss": 0.2704, "step": 7875 }, { "epoch": 0.31, "grad_norm": 5.948769494377511, "learning_rate": 3.938e-06, "loss": 0.266, "step": 7900 }, { "epoch": 0.31, "grad_norm": 7.0940656271040545, "learning_rate": 3.9505000000000005e-06, "loss": 0.2859, "step": 7925 }, { "epoch": 0.31, "grad_norm": 5.508339585723285, "learning_rate": 3.963e-06, "loss": 0.2851, "step": 7950 }, { "epoch": 0.31, "grad_norm": 6.14929249851529, "learning_rate": 3.9755e-06, "loss": 0.2823, "step": 7975 }, { "epoch": 0.31, "grad_norm": 5.3779482123770475, "learning_rate": 3.988000000000001e-06, "loss": 0.2698, "step": 8000 }, { "epoch": 0.32, "grad_norm": 6.074063079478101, "learning_rate": 4.000500000000001e-06, "loss": 0.2819, "step": 8025 }, { "epoch": 0.32, "grad_norm": 6.362443834444672, "learning_rate": 4.013e-06, "loss": 0.2623, "step": 8050 }, { "epoch": 0.32, "grad_norm": 6.51778892694598, "learning_rate": 4.0250000000000004e-06, "loss": 0.2563, "step": 8075 }, { "epoch": 0.32, "grad_norm": 5.3055285454811845, "learning_rate": 4.037500000000001e-06, "loss": 0.2698, "step": 8100 }, { "epoch": 0.32, "grad_norm": 5.9267504904096375, "learning_rate": 4.05e-06, "loss": 0.2522, "step": 8125 }, { "epoch": 0.32, "grad_norm": 7.966682070450956, "learning_rate": 4.0625000000000005e-06, "loss": 0.2976, "step": 8150 }, { "epoch": 0.32, "grad_norm": 6.548744530829551, "learning_rate": 4.075e-06, "loss": 0.2904, "step": 8175 }, { "epoch": 0.32, "grad_norm": 6.753040626050456, "learning_rate": 4.0875e-06, "loss": 0.2843, "step": 8200 }, { "epoch": 0.32, "grad_norm": 7.200730763133159, "learning_rate": 4.1e-06, "loss": 0.2648, "step": 8225 }, { "epoch": 0.32, "grad_norm": 6.411993308970435, "learning_rate": 4.1125e-06, "loss": 0.2949, "step": 8250 }, { "epoch": 0.33, "grad_norm": 6.226554884823071, "learning_rate": 4.125e-06, "loss": 0.2606, "step": 8275 }, { "epoch": 0.33, "grad_norm": 6.5307361977359895, "learning_rate": 4.137500000000001e-06, "loss": 0.275, "step": 8300 }, { "epoch": 0.33, "grad_norm": 12.482533214728566, "learning_rate": 4.15e-06, "loss": 0.2997, "step": 8325 }, { "epoch": 0.33, "grad_norm": 6.674706679093602, "learning_rate": 4.1625e-06, "loss": 0.2955, "step": 8350 }, { "epoch": 0.33, "grad_norm": 6.992206297283636, "learning_rate": 4.175e-06, "loss": 0.2743, "step": 8375 }, { "epoch": 0.33, "grad_norm": 6.042025737938246, "learning_rate": 4.1875e-06, "loss": 0.2671, "step": 8400 }, { "epoch": 0.33, "grad_norm": 6.055250933615206, "learning_rate": 4.2000000000000004e-06, "loss": 0.2577, "step": 8425 }, { "epoch": 0.33, "grad_norm": 5.836279970090582, "learning_rate": 4.212500000000001e-06, "loss": 0.2959, "step": 8450 }, { "epoch": 0.33, "grad_norm": 6.347072802266105, "learning_rate": 4.225e-06, "loss": 0.3041, "step": 8475 }, { "epoch": 0.33, "grad_norm": 6.126442118336176, "learning_rate": 4.2375000000000005e-06, "loss": 0.2882, "step": 8500 }, { "epoch": 0.34, "grad_norm": 6.18958310982247, "learning_rate": 4.25e-06, "loss": 0.3015, "step": 8525 }, { "epoch": 0.34, "grad_norm": 6.197927352860307, "learning_rate": 4.2625e-06, "loss": 0.2791, "step": 8550 }, { "epoch": 0.34, "grad_norm": 7.522494327067332, "learning_rate": 4.2750000000000006e-06, "loss": 0.2736, "step": 8575 }, { "epoch": 0.34, "grad_norm": 6.237403417855199, "learning_rate": 4.287500000000001e-06, "loss": 0.2924, "step": 8600 }, { "epoch": 0.34, "grad_norm": 5.3182172892190565, "learning_rate": 4.3e-06, "loss": 0.2824, "step": 8625 }, { "epoch": 0.34, "grad_norm": 9.816379144060154, "learning_rate": 4.312500000000001e-06, "loss": 0.2843, "step": 8650 }, { "epoch": 0.34, "grad_norm": 4.895548452165421, "learning_rate": 4.325e-06, "loss": 0.284, "step": 8675 }, { "epoch": 0.34, "grad_norm": 6.654941433869707, "learning_rate": 4.3375e-06, "loss": 0.2705, "step": 8700 }, { "epoch": 0.34, "grad_norm": 10.469132180677638, "learning_rate": 4.350000000000001e-06, "loss": 0.2729, "step": 8725 }, { "epoch": 0.34, "grad_norm": 6.44234959433828, "learning_rate": 4.362500000000001e-06, "loss": 0.2644, "step": 8750 }, { "epoch": 0.35, "grad_norm": 5.838324842695495, "learning_rate": 4.3750000000000005e-06, "loss": 0.2676, "step": 8775 }, { "epoch": 0.35, "grad_norm": 9.085540132092943, "learning_rate": 4.3875e-06, "loss": 0.2869, "step": 8800 }, { "epoch": 0.35, "grad_norm": 6.795754152011421, "learning_rate": 4.4e-06, "loss": 0.276, "step": 8825 }, { "epoch": 0.35, "grad_norm": 7.102982969922736, "learning_rate": 4.4125000000000005e-06, "loss": 0.2595, "step": 8850 }, { "epoch": 0.35, "grad_norm": 7.700990648549398, "learning_rate": 4.425e-06, "loss": 0.2812, "step": 8875 }, { "epoch": 0.35, "grad_norm": 4.312933654551669, "learning_rate": 4.4375e-06, "loss": 0.2852, "step": 8900 }, { "epoch": 0.35, "grad_norm": 8.379757647366656, "learning_rate": 4.450000000000001e-06, "loss": 0.2779, "step": 8925 }, { "epoch": 0.35, "grad_norm": 6.792550628543418, "learning_rate": 4.4625e-06, "loss": 0.2793, "step": 8950 }, { "epoch": 0.35, "grad_norm": 5.945063099925784, "learning_rate": 4.475e-06, "loss": 0.291, "step": 8975 }, { "epoch": 0.35, "grad_norm": 6.365832685836148, "learning_rate": 4.4875e-06, "loss": 0.275, "step": 9000 }, { "epoch": 0.36, "grad_norm": 6.188639752527868, "learning_rate": 4.5e-06, "loss": 0.3043, "step": 9025 }, { "epoch": 0.36, "grad_norm": 5.383012165041406, "learning_rate": 4.5125e-06, "loss": 0.2791, "step": 9050 }, { "epoch": 0.36, "grad_norm": 5.5648061171749745, "learning_rate": 4.5245000000000005e-06, "loss": 0.2812, "step": 9075 }, { "epoch": 0.36, "grad_norm": 6.718625044735663, "learning_rate": 4.537e-06, "loss": 0.2817, "step": 9100 }, { "epoch": 0.36, "grad_norm": 7.578986129922235, "learning_rate": 4.5495e-06, "loss": 0.3059, "step": 9125 }, { "epoch": 0.36, "grad_norm": 4.950562990896707, "learning_rate": 4.5620000000000005e-06, "loss": 0.2991, "step": 9150 }, { "epoch": 0.36, "grad_norm": 6.6274318125177105, "learning_rate": 4.574500000000001e-06, "loss": 0.2905, "step": 9175 }, { "epoch": 0.36, "grad_norm": 7.868725872074689, "learning_rate": 4.587e-06, "loss": 0.3046, "step": 9200 }, { "epoch": 0.36, "grad_norm": 7.514705728312666, "learning_rate": 4.599500000000001e-06, "loss": 0.2778, "step": 9225 }, { "epoch": 0.36, "grad_norm": 6.37394309344846, "learning_rate": 4.612e-06, "loss": 0.2796, "step": 9250 }, { "epoch": 0.36, "grad_norm": 6.710149784112031, "learning_rate": 4.6245e-06, "loss": 0.2921, "step": 9275 }, { "epoch": 0.37, "grad_norm": 7.545639520621682, "learning_rate": 4.637000000000001e-06, "loss": 0.268, "step": 9300 }, { "epoch": 0.37, "grad_norm": 5.688131287423445, "learning_rate": 4.6495e-06, "loss": 0.2524, "step": 9325 }, { "epoch": 0.37, "grad_norm": 6.135284919662475, "learning_rate": 4.6620000000000004e-06, "loss": 0.276, "step": 9350 }, { "epoch": 0.37, "grad_norm": 6.688180517230278, "learning_rate": 4.6745e-06, "loss": 0.2862, "step": 9375 }, { "epoch": 0.37, "grad_norm": 6.795335976277467, "learning_rate": 4.687e-06, "loss": 0.2888, "step": 9400 }, { "epoch": 0.37, "grad_norm": 5.746370462450612, "learning_rate": 4.6995000000000005e-06, "loss": 0.2662, "step": 9425 }, { "epoch": 0.37, "grad_norm": 6.769522511707671, "learning_rate": 4.712000000000001e-06, "loss": 0.2984, "step": 9450 }, { "epoch": 0.37, "grad_norm": 5.346844756223433, "learning_rate": 4.7245e-06, "loss": 0.2596, "step": 9475 }, { "epoch": 0.37, "grad_norm": 7.3781982999908795, "learning_rate": 4.7370000000000006e-06, "loss": 0.2792, "step": 9500 }, { "epoch": 0.37, "grad_norm": 6.069856946236931, "learning_rate": 4.7495e-06, "loss": 0.2762, "step": 9525 }, { "epoch": 0.38, "grad_norm": 5.564725582025151, "learning_rate": 4.762e-06, "loss": 0.2868, "step": 9550 }, { "epoch": 0.38, "grad_norm": 4.831607241976059, "learning_rate": 4.774500000000001e-06, "loss": 0.2633, "step": 9575 }, { "epoch": 0.38, "grad_norm": 5.993570224205531, "learning_rate": 4.787000000000001e-06, "loss": 0.2907, "step": 9600 }, { "epoch": 0.38, "grad_norm": 5.473582625195408, "learning_rate": 4.7995e-06, "loss": 0.2939, "step": 9625 }, { "epoch": 0.38, "grad_norm": 4.8096853747215675, "learning_rate": 4.812000000000001e-06, "loss": 0.2694, "step": 9650 }, { "epoch": 0.38, "grad_norm": 7.871920620924059, "learning_rate": 4.8245e-06, "loss": 0.2796, "step": 9675 }, { "epoch": 0.38, "grad_norm": 5.914335893785346, "learning_rate": 4.8370000000000004e-06, "loss": 0.2843, "step": 9700 }, { "epoch": 0.38, "grad_norm": 5.459462930577671, "learning_rate": 4.8495e-06, "loss": 0.273, "step": 9725 }, { "epoch": 0.38, "grad_norm": 7.001699192626203, "learning_rate": 4.862e-06, "loss": 0.2588, "step": 9750 }, { "epoch": 0.38, "grad_norm": 6.407971853644161, "learning_rate": 4.8745000000000005e-06, "loss": 0.2705, "step": 9775 }, { "epoch": 0.39, "grad_norm": 5.359611800340205, "learning_rate": 4.887000000000001e-06, "loss": 0.2818, "step": 9800 }, { "epoch": 0.39, "grad_norm": 5.385058355738006, "learning_rate": 4.8995e-06, "loss": 0.2821, "step": 9825 }, { "epoch": 0.39, "grad_norm": 5.8879065529077135, "learning_rate": 4.9120000000000006e-06, "loss": 0.2879, "step": 9850 }, { "epoch": 0.39, "grad_norm": 5.521614128029462, "learning_rate": 4.9245e-06, "loss": 0.2701, "step": 9875 }, { "epoch": 0.39, "grad_norm": 6.22001469157038, "learning_rate": 4.937e-06, "loss": 0.2814, "step": 9900 }, { "epoch": 0.39, "grad_norm": 5.645954802312944, "learning_rate": 4.949500000000001e-06, "loss": 0.2788, "step": 9925 }, { "epoch": 0.39, "grad_norm": 5.975705959262324, "learning_rate": 4.962e-06, "loss": 0.2745, "step": 9950 }, { "epoch": 0.39, "grad_norm": 6.7617079563479345, "learning_rate": 4.9745e-06, "loss": 0.2882, "step": 9975 }, { "epoch": 0.39, "grad_norm": 6.349127933186041, "learning_rate": 4.987e-06, "loss": 0.2717, "step": 10000 }, { "epoch": 0.39, "eval_loss": 0.414306640625, "eval_runtime": 11566.661, "eval_samples_per_second": 0.818, "eval_steps_per_second": 0.051, "eval_wer": 0.1341312356224764, "step": 10000 }, { "epoch": 0.39, "grad_norm": 7.011108784934462, "learning_rate": 4.9995e-06, "loss": 0.2786, "step": 10025 }, { "epoch": 0.4, "grad_norm": 6.8292619191203014, "learning_rate": 4.999959866220736e-06, "loss": 0.2727, "step": 10050 }, { "epoch": 0.4, "grad_norm": 8.723216450366968, "learning_rate": 4.999919732441472e-06, "loss": 0.2894, "step": 10075 }, { "epoch": 0.4, "grad_norm": 7.24048520411389, "learning_rate": 4.999877926421406e-06, "loss": 0.2752, "step": 10100 }, { "epoch": 0.4, "grad_norm": 6.342763382928159, "learning_rate": 4.999836120401338e-06, "loss": 0.2667, "step": 10125 }, { "epoch": 0.4, "grad_norm": 7.50752715283013, "learning_rate": 4.999794314381271e-06, "loss": 0.2939, "step": 10150 }, { "epoch": 0.4, "grad_norm": 6.204906569121375, "learning_rate": 4.9997525083612046e-06, "loss": 0.2718, "step": 10175 }, { "epoch": 0.4, "grad_norm": 4.752575646886806, "learning_rate": 4.999710702341137e-06, "loss": 0.2747, "step": 10200 }, { "epoch": 0.4, "grad_norm": 5.796030259684187, "learning_rate": 4.999668896321071e-06, "loss": 0.2902, "step": 10225 }, { "epoch": 0.4, "grad_norm": 6.053195409587687, "learning_rate": 4.9996270903010035e-06, "loss": 0.3108, "step": 10250 }, { "epoch": 0.4, "grad_norm": 6.059127631079693, "learning_rate": 4.999585284280937e-06, "loss": 0.2775, "step": 10275 }, { "epoch": 0.41, "grad_norm": 6.383155938148739, "learning_rate": 4.99954347826087e-06, "loss": 0.3066, "step": 10300 }, { "epoch": 0.41, "grad_norm": 6.561402837665798, "learning_rate": 4.999501672240803e-06, "loss": 0.2798, "step": 10325 }, { "epoch": 0.41, "grad_norm": 7.225986990831043, "learning_rate": 4.999459866220736e-06, "loss": 0.2957, "step": 10350 }, { "epoch": 0.41, "grad_norm": 5.381995878211679, "learning_rate": 4.9994180602006695e-06, "loss": 0.2824, "step": 10375 }, { "epoch": 0.41, "grad_norm": 6.422499531575439, "learning_rate": 4.999376254180602e-06, "loss": 0.2682, "step": 10400 }, { "epoch": 0.41, "grad_norm": 6.429046461471528, "learning_rate": 4.999334448160536e-06, "loss": 0.2909, "step": 10425 }, { "epoch": 0.41, "grad_norm": 5.823072159394408, "learning_rate": 4.9992926421404685e-06, "loss": 0.2588, "step": 10450 }, { "epoch": 0.41, "grad_norm": 6.64555406872812, "learning_rate": 4.999250836120402e-06, "loss": 0.2675, "step": 10475 }, { "epoch": 0.41, "grad_norm": 7.285451019452604, "learning_rate": 4.999209030100335e-06, "loss": 0.2743, "step": 10500 }, { "epoch": 0.41, "grad_norm": 5.804120728206725, "learning_rate": 4.999167224080268e-06, "loss": 0.288, "step": 10525 }, { "epoch": 0.42, "grad_norm": 5.098440922169357, "learning_rate": 4.999125418060201e-06, "loss": 0.272, "step": 10550 }, { "epoch": 0.42, "grad_norm": 6.546611829047939, "learning_rate": 4.9990836120401345e-06, "loss": 0.2883, "step": 10575 }, { "epoch": 0.42, "grad_norm": 5.622357797847333, "learning_rate": 4.999041806020067e-06, "loss": 0.2577, "step": 10600 }, { "epoch": 0.42, "grad_norm": 6.001323909432522, "learning_rate": 4.999000000000001e-06, "loss": 0.2925, "step": 10625 }, { "epoch": 0.42, "grad_norm": 6.211276512696046, "learning_rate": 4.9989581939799335e-06, "loss": 0.2711, "step": 10650 }, { "epoch": 0.42, "grad_norm": 6.565052395271628, "learning_rate": 4.998916387959867e-06, "loss": 0.2988, "step": 10675 }, { "epoch": 0.42, "grad_norm": 6.296027910196759, "learning_rate": 4.9988745819398e-06, "loss": 0.2773, "step": 10700 }, { "epoch": 0.42, "grad_norm": 6.344517029275454, "learning_rate": 4.998832775919733e-06, "loss": 0.257, "step": 10725 }, { "epoch": 0.42, "grad_norm": 6.746065840605949, "learning_rate": 4.998790969899666e-06, "loss": 0.2832, "step": 10750 }, { "epoch": 0.42, "grad_norm": 6.779174959859481, "learning_rate": 4.9987491638795995e-06, "loss": 0.281, "step": 10775 }, { "epoch": 0.42, "grad_norm": 6.109047891868748, "learning_rate": 4.998707357859532e-06, "loss": 0.2972, "step": 10800 }, { "epoch": 0.43, "grad_norm": 5.426495294412173, "learning_rate": 4.998665551839466e-06, "loss": 0.2689, "step": 10825 }, { "epoch": 0.43, "grad_norm": 8.24421870740098, "learning_rate": 4.9986237458193984e-06, "loss": 0.2659, "step": 10850 }, { "epoch": 0.43, "grad_norm": 7.081586642876751, "learning_rate": 4.998581939799331e-06, "loss": 0.2672, "step": 10875 }, { "epoch": 0.43, "grad_norm": 5.126266886484015, "learning_rate": 4.998540133779265e-06, "loss": 0.2675, "step": 10900 }, { "epoch": 0.43, "grad_norm": 5.726554912145362, "learning_rate": 4.998498327759197e-06, "loss": 0.2881, "step": 10925 }, { "epoch": 0.43, "grad_norm": 5.749427772911198, "learning_rate": 4.998456521739131e-06, "loss": 0.297, "step": 10950 }, { "epoch": 0.43, "grad_norm": 5.598962835292076, "learning_rate": 4.998414715719064e-06, "loss": 0.2978, "step": 10975 }, { "epoch": 0.43, "grad_norm": 6.200710604353594, "learning_rate": 4.998372909698997e-06, "loss": 0.2692, "step": 11000 }, { "epoch": 0.43, "grad_norm": 6.702673268567435, "learning_rate": 4.99833110367893e-06, "loss": 0.2944, "step": 11025 }, { "epoch": 0.43, "grad_norm": 6.625028895002169, "learning_rate": 4.998289297658863e-06, "loss": 0.2885, "step": 11050 }, { "epoch": 0.44, "grad_norm": 7.820444832366389, "learning_rate": 4.998249163879598e-06, "loss": 0.2942, "step": 11075 }, { "epoch": 0.44, "grad_norm": 5.567604834669745, "learning_rate": 4.998207357859532e-06, "loss": 0.2742, "step": 11100 }, { "epoch": 0.44, "grad_norm": 6.423735455535033, "learning_rate": 4.998165551839465e-06, "loss": 0.2846, "step": 11125 }, { "epoch": 0.44, "grad_norm": 4.999616850642319, "learning_rate": 4.998123745819398e-06, "loss": 0.2782, "step": 11150 }, { "epoch": 0.44, "grad_norm": 7.030451571678514, "learning_rate": 4.998081939799332e-06, "loss": 0.3123, "step": 11175 }, { "epoch": 0.44, "grad_norm": 7.9115247740062875, "learning_rate": 4.9980401337792644e-06, "loss": 0.2536, "step": 11200 }, { "epoch": 0.44, "grad_norm": 6.072557616156972, "learning_rate": 4.997998327759198e-06, "loss": 0.2915, "step": 11225 }, { "epoch": 0.44, "grad_norm": 6.711391798767144, "learning_rate": 4.997956521739131e-06, "loss": 0.2736, "step": 11250 }, { "epoch": 0.44, "grad_norm": 5.645455721566767, "learning_rate": 4.997914715719064e-06, "loss": 0.288, "step": 11275 }, { "epoch": 0.44, "grad_norm": 7.186122138761158, "learning_rate": 4.997872909698997e-06, "loss": 0.2697, "step": 11300 }, { "epoch": 0.45, "grad_norm": 9.00329145846993, "learning_rate": 4.9978311036789305e-06, "loss": 0.2924, "step": 11325 }, { "epoch": 0.45, "grad_norm": 6.312139760803962, "learning_rate": 4.997789297658863e-06, "loss": 0.2756, "step": 11350 }, { "epoch": 0.45, "grad_norm": 5.8874897753934965, "learning_rate": 4.997747491638797e-06, "loss": 0.2703, "step": 11375 }, { "epoch": 0.45, "grad_norm": 8.211561229810027, "learning_rate": 4.997705685618729e-06, "loss": 0.2724, "step": 11400 }, { "epoch": 0.45, "grad_norm": 6.444390049063497, "learning_rate": 4.997663879598663e-06, "loss": 0.2877, "step": 11425 }, { "epoch": 0.45, "grad_norm": 6.170573267490551, "learning_rate": 4.997622073578596e-06, "loss": 0.259, "step": 11450 }, { "epoch": 0.45, "grad_norm": 4.632322155697068, "learning_rate": 4.997580267558529e-06, "loss": 0.2823, "step": 11475 }, { "epoch": 0.45, "grad_norm": 6.152734308462785, "learning_rate": 4.997538461538462e-06, "loss": 0.2607, "step": 11500 }, { "epoch": 0.45, "grad_norm": 6.107228957537137, "learning_rate": 4.9974966555183954e-06, "loss": 0.2746, "step": 11525 }, { "epoch": 0.45, "grad_norm": 6.472917184920453, "learning_rate": 4.997454849498328e-06, "loss": 0.2728, "step": 11550 }, { "epoch": 0.46, "grad_norm": 5.83478467904063, "learning_rate": 4.997413043478262e-06, "loss": 0.2751, "step": 11575 }, { "epoch": 0.46, "grad_norm": 4.279889559058508, "learning_rate": 4.997371237458194e-06, "loss": 0.2659, "step": 11600 }, { "epoch": 0.46, "grad_norm": 7.228962539059828, "learning_rate": 4.997329431438128e-06, "loss": 0.2946, "step": 11625 }, { "epoch": 0.46, "grad_norm": 6.329870520493551, "learning_rate": 4.997287625418061e-06, "loss": 0.2752, "step": 11650 }, { "epoch": 0.46, "grad_norm": 6.644595157995969, "learning_rate": 4.997245819397994e-06, "loss": 0.2782, "step": 11675 }, { "epoch": 0.46, "grad_norm": 7.922993574833506, "learning_rate": 4.997204013377927e-06, "loss": 0.277, "step": 11700 }, { "epoch": 0.46, "grad_norm": 5.510974001369662, "learning_rate": 4.99716220735786e-06, "loss": 0.2752, "step": 11725 }, { "epoch": 0.46, "grad_norm": 4.9210546984842365, "learning_rate": 4.997120401337793e-06, "loss": 0.2634, "step": 11750 }, { "epoch": 0.46, "grad_norm": 6.079776486981066, "learning_rate": 4.997078595317727e-06, "loss": 0.2647, "step": 11775 }, { "epoch": 0.46, "grad_norm": 6.305360379026096, "learning_rate": 4.997036789297659e-06, "loss": 0.2988, "step": 11800 }, { "epoch": 0.47, "grad_norm": 7.735797578726933, "learning_rate": 4.996994983277592e-06, "loss": 0.2484, "step": 11825 }, { "epoch": 0.47, "grad_norm": 7.43170191366388, "learning_rate": 4.996953177257526e-06, "loss": 0.2619, "step": 11850 }, { "epoch": 0.47, "grad_norm": 6.402374846991611, "learning_rate": 4.996911371237458e-06, "loss": 0.2746, "step": 11875 }, { "epoch": 0.47, "grad_norm": 6.531743806890973, "learning_rate": 4.996869565217392e-06, "loss": 0.2665, "step": 11900 }, { "epoch": 0.47, "grad_norm": 4.839865759331419, "learning_rate": 4.9968277591973245e-06, "loss": 0.2702, "step": 11925 }, { "epoch": 0.47, "grad_norm": 5.825681200003497, "learning_rate": 4.996785953177258e-06, "loss": 0.2531, "step": 11950 }, { "epoch": 0.47, "grad_norm": 7.125879741698381, "learning_rate": 4.996744147157191e-06, "loss": 0.2704, "step": 11975 }, { "epoch": 0.47, "grad_norm": 5.663308145081337, "learning_rate": 4.996702341137124e-06, "loss": 0.2653, "step": 12000 }, { "epoch": 0.47, "grad_norm": 6.184543925229218, "learning_rate": 4.996660535117057e-06, "loss": 0.2786, "step": 12025 }, { "epoch": 0.47, "grad_norm": 7.865850646784692, "learning_rate": 4.996618729096991e-06, "loss": 0.2781, "step": 12050 }, { "epoch": 0.48, "grad_norm": 6.13013469457717, "learning_rate": 4.9965785953177256e-06, "loss": 0.2725, "step": 12075 }, { "epoch": 0.48, "grad_norm": 5.6434695533203305, "learning_rate": 4.996536789297659e-06, "loss": 0.2852, "step": 12100 }, { "epoch": 0.48, "grad_norm": 6.9799882279278265, "learning_rate": 4.996494983277592e-06, "loss": 0.2699, "step": 12125 }, { "epoch": 0.48, "grad_norm": 6.060616926983353, "learning_rate": 4.996453177257525e-06, "loss": 0.2752, "step": 12150 }, { "epoch": 0.48, "grad_norm": 4.746693589734832, "learning_rate": 4.996411371237458e-06, "loss": 0.2624, "step": 12175 }, { "epoch": 0.48, "grad_norm": 6.229583616728782, "learning_rate": 4.996369565217392e-06, "loss": 0.2515, "step": 12200 }, { "epoch": 0.48, "grad_norm": 4.999145743899038, "learning_rate": 4.996327759197324e-06, "loss": 0.2775, "step": 12225 }, { "epoch": 0.48, "grad_norm": 6.927319188266453, "learning_rate": 4.996285953177258e-06, "loss": 0.2846, "step": 12250 }, { "epoch": 0.48, "grad_norm": 6.874400599413486, "learning_rate": 4.996244147157191e-06, "loss": 0.2758, "step": 12275 }, { "epoch": 0.48, "grad_norm": 6.024826067369364, "learning_rate": 4.996202341137124e-06, "loss": 0.2733, "step": 12300 }, { "epoch": 0.48, "grad_norm": 5.724310991701934, "learning_rate": 4.996160535117058e-06, "loss": 0.2661, "step": 12325 }, { "epoch": 0.49, "grad_norm": 6.190847168134574, "learning_rate": 4.99611872909699e-06, "loss": 0.3009, "step": 12350 }, { "epoch": 0.49, "grad_norm": 5.981795774000632, "learning_rate": 4.996076923076924e-06, "loss": 0.2714, "step": 12375 }, { "epoch": 0.49, "grad_norm": 5.982768726819535, "learning_rate": 4.9960351170568566e-06, "loss": 0.2452, "step": 12400 }, { "epoch": 0.49, "grad_norm": 6.30852279239966, "learning_rate": 4.99599331103679e-06, "loss": 0.2584, "step": 12425 }, { "epoch": 0.49, "grad_norm": 6.858438111584143, "learning_rate": 4.995951505016723e-06, "loss": 0.2505, "step": 12450 }, { "epoch": 0.49, "grad_norm": 6.174552548362399, "learning_rate": 4.995909698996656e-06, "loss": 0.2532, "step": 12475 }, { "epoch": 0.49, "grad_norm": 7.0465648379078125, "learning_rate": 4.995867892976589e-06, "loss": 0.265, "step": 12500 }, { "epoch": 0.49, "grad_norm": 6.988153680687835, "learning_rate": 4.995826086956523e-06, "loss": 0.2432, "step": 12525 }, { "epoch": 0.49, "grad_norm": 6.264922296051626, "learning_rate": 4.995784280936455e-06, "loss": 0.2657, "step": 12550 }, { "epoch": 0.49, "grad_norm": 6.449845098914649, "learning_rate": 4.995742474916389e-06, "loss": 0.2708, "step": 12575 }, { "epoch": 0.5, "grad_norm": 7.009503417055907, "learning_rate": 4.995702341137124e-06, "loss": 0.2522, "step": 12600 }, { "epoch": 0.5, "grad_norm": 5.1804236684247345, "learning_rate": 4.995660535117057e-06, "loss": 0.2539, "step": 12625 }, { "epoch": 0.5, "grad_norm": 5.543935093810235, "learning_rate": 4.99561872909699e-06, "loss": 0.2672, "step": 12650 }, { "epoch": 0.5, "grad_norm": 6.471815945203869, "learning_rate": 4.995576923076924e-06, "loss": 0.2952, "step": 12675 }, { "epoch": 0.5, "grad_norm": 6.501833406890489, "learning_rate": 4.995535117056856e-06, "loss": 0.2667, "step": 12700 }, { "epoch": 0.5, "grad_norm": 6.343214654642585, "learning_rate": 4.99549331103679e-06, "loss": 0.2872, "step": 12725 }, { "epoch": 0.5, "grad_norm": 5.271797540434532, "learning_rate": 4.9954515050167226e-06, "loss": 0.2683, "step": 12750 }, { "epoch": 0.5, "grad_norm": 6.599139231991323, "learning_rate": 4.995409698996656e-06, "loss": 0.263, "step": 12775 }, { "epoch": 0.5, "grad_norm": 6.002476348463217, "learning_rate": 4.995367892976589e-06, "loss": 0.2783, "step": 12800 }, { "epoch": 0.5, "grad_norm": 5.525297588148386, "learning_rate": 4.995326086956522e-06, "loss": 0.2871, "step": 12825 }, { "epoch": 0.51, "grad_norm": 5.708477915446077, "learning_rate": 4.995284280936455e-06, "loss": 0.2691, "step": 12850 }, { "epoch": 0.51, "grad_norm": 6.6843516447099445, "learning_rate": 4.995242474916389e-06, "loss": 0.2871, "step": 12875 }, { "epoch": 0.51, "grad_norm": 5.115815653336138, "learning_rate": 4.995200668896321e-06, "loss": 0.2425, "step": 12900 }, { "epoch": 0.51, "grad_norm": 6.516585844765604, "learning_rate": 4.995158862876255e-06, "loss": 0.2576, "step": 12925 }, { "epoch": 0.51, "grad_norm": 5.532827849171224, "learning_rate": 4.995117056856188e-06, "loss": 0.2921, "step": 12950 }, { "epoch": 0.51, "grad_norm": 5.0067926091682144, "learning_rate": 4.99507525083612e-06, "loss": 0.2559, "step": 12975 }, { "epoch": 0.51, "grad_norm": 5.750130129977456, "learning_rate": 4.995033444816054e-06, "loss": 0.264, "step": 13000 }, { "epoch": 0.51, "grad_norm": 5.321056688567065, "learning_rate": 4.9949916387959865e-06, "loss": 0.2627, "step": 13025 }, { "epoch": 0.51, "grad_norm": 6.063866279377709, "learning_rate": 4.99494983277592e-06, "loss": 0.2723, "step": 13050 }, { "epoch": 0.51, "grad_norm": 5.685284661380941, "learning_rate": 4.994908026755853e-06, "loss": 0.2791, "step": 13075 }, { "epoch": 0.52, "grad_norm": 5.645607817332848, "learning_rate": 4.994866220735786e-06, "loss": 0.2569, "step": 13100 }, { "epoch": 0.52, "grad_norm": 5.818177466809242, "learning_rate": 4.994824414715719e-06, "loss": 0.2652, "step": 13125 }, { "epoch": 0.52, "grad_norm": 5.567556521092891, "learning_rate": 4.9947826086956525e-06, "loss": 0.2722, "step": 13150 }, { "epoch": 0.52, "grad_norm": 6.049853304178241, "learning_rate": 4.994740802675585e-06, "loss": 0.2521, "step": 13175 }, { "epoch": 0.52, "grad_norm": 5.525962556248073, "learning_rate": 4.994698996655519e-06, "loss": 0.2782, "step": 13200 }, { "epoch": 0.52, "grad_norm": 5.2979035414331035, "learning_rate": 4.9946571906354515e-06, "loss": 0.2672, "step": 13225 }, { "epoch": 0.52, "grad_norm": 5.494928513364084, "learning_rate": 4.994615384615385e-06, "loss": 0.2705, "step": 13250 }, { "epoch": 0.52, "grad_norm": 6.084597225504904, "learning_rate": 4.994573578595318e-06, "loss": 0.2883, "step": 13275 }, { "epoch": 0.52, "grad_norm": 4.072120776961249, "learning_rate": 4.994531772575251e-06, "loss": 0.2588, "step": 13300 }, { "epoch": 0.52, "grad_norm": 6.741904082206531, "learning_rate": 4.994489966555184e-06, "loss": 0.2687, "step": 13325 }, { "epoch": 0.53, "grad_norm": 6.687864286901733, "learning_rate": 4.9944481605351175e-06, "loss": 0.2618, "step": 13350 }, { "epoch": 0.53, "grad_norm": 6.279456079341556, "learning_rate": 4.994406354515051e-06, "loss": 0.2788, "step": 13375 }, { "epoch": 0.53, "grad_norm": 4.034957788594076, "learning_rate": 4.994364548494984e-06, "loss": 0.2425, "step": 13400 }, { "epoch": 0.53, "grad_norm": 5.552783387380151, "learning_rate": 4.994322742474917e-06, "loss": 0.251, "step": 13425 }, { "epoch": 0.53, "grad_norm": 7.177948746144479, "learning_rate": 4.99428093645485e-06, "loss": 0.2782, "step": 13450 }, { "epoch": 0.53, "grad_norm": 5.068512146023676, "learning_rate": 4.9942391304347835e-06, "loss": 0.2608, "step": 13475 }, { "epoch": 0.53, "grad_norm": 5.744283896920329, "learning_rate": 4.994197324414716e-06, "loss": 0.2564, "step": 13500 }, { "epoch": 0.53, "grad_norm": 7.126944376370206, "learning_rate": 4.99415551839465e-06, "loss": 0.2673, "step": 13525 }, { "epoch": 0.53, "grad_norm": 4.907130426240397, "learning_rate": 4.9941137123745825e-06, "loss": 0.2547, "step": 13550 }, { "epoch": 0.53, "grad_norm": 5.4639597192774865, "learning_rate": 4.994071906354516e-06, "loss": 0.2795, "step": 13575 }, { "epoch": 0.54, "grad_norm": 3.672162745544116, "learning_rate": 4.994030100334449e-06, "loss": 0.289, "step": 13600 }, { "epoch": 0.54, "grad_norm": 6.72396760446355, "learning_rate": 4.993988294314382e-06, "loss": 0.2711, "step": 13625 }, { "epoch": 0.54, "grad_norm": 6.555801546793623, "learning_rate": 4.993946488294315e-06, "loss": 0.2666, "step": 13650 }, { "epoch": 0.54, "grad_norm": 3.9331030728145016, "learning_rate": 4.993904682274248e-06, "loss": 0.243, "step": 13675 }, { "epoch": 0.54, "grad_norm": 5.11331974297062, "learning_rate": 4.99386287625418e-06, "loss": 0.2442, "step": 13700 }, { "epoch": 0.54, "grad_norm": 8.512882588773397, "learning_rate": 4.993821070234114e-06, "loss": 0.2622, "step": 13725 }, { "epoch": 0.54, "grad_norm": 6.5372514495460905, "learning_rate": 4.993779264214047e-06, "loss": 0.299, "step": 13750 }, { "epoch": 0.54, "grad_norm": 5.888509838266094, "learning_rate": 4.99373745819398e-06, "loss": 0.2575, "step": 13775 }, { "epoch": 0.54, "grad_norm": 5.748557291602811, "learning_rate": 4.993695652173914e-06, "loss": 0.2668, "step": 13800 }, { "epoch": 0.54, "grad_norm": 5.6182807926195215, "learning_rate": 4.993653846153846e-06, "loss": 0.2569, "step": 13825 }, { "epoch": 0.54, "grad_norm": 6.067375657021155, "learning_rate": 4.99361204013378e-06, "loss": 0.2417, "step": 13850 }, { "epoch": 0.55, "grad_norm": 7.0814238478732365, "learning_rate": 4.993570234113713e-06, "loss": 0.284, "step": 13875 }, { "epoch": 0.55, "grad_norm": 5.849375990146647, "learning_rate": 4.993528428093646e-06, "loss": 0.2586, "step": 13900 }, { "epoch": 0.55, "grad_norm": 6.420173287461275, "learning_rate": 4.993486622073579e-06, "loss": 0.2685, "step": 13925 }, { "epoch": 0.55, "grad_norm": 6.510360107745434, "learning_rate": 4.9934448160535124e-06, "loss": 0.2622, "step": 13950 }, { "epoch": 0.55, "grad_norm": 5.744495569021254, "learning_rate": 4.993403010033445e-06, "loss": 0.2654, "step": 13975 }, { "epoch": 0.55, "grad_norm": 5.241624718908685, "learning_rate": 4.993361204013379e-06, "loss": 0.2575, "step": 14000 }, { "epoch": 0.55, "grad_norm": 5.964854463517818, "learning_rate": 4.993319397993311e-06, "loss": 0.2601, "step": 14025 }, { "epoch": 0.55, "grad_norm": 5.98667622104724, "learning_rate": 4.993277591973245e-06, "loss": 0.2631, "step": 14050 }, { "epoch": 0.55, "grad_norm": 6.991177855529151, "learning_rate": 4.993235785953178e-06, "loss": 0.2711, "step": 14075 }, { "epoch": 0.55, "grad_norm": 5.205578681842165, "learning_rate": 4.993193979933111e-06, "loss": 0.3043, "step": 14100 }, { "epoch": 0.56, "grad_norm": 5.597102209370411, "learning_rate": 4.993152173913044e-06, "loss": 0.292, "step": 14125 }, { "epoch": 0.56, "grad_norm": 4.747292937249573, "learning_rate": 4.993110367892977e-06, "loss": 0.2496, "step": 14150 }, { "epoch": 0.56, "grad_norm": 5.567122798131745, "learning_rate": 4.99306856187291e-06, "loss": 0.2898, "step": 14175 }, { "epoch": 0.56, "grad_norm": 5.425348552599907, "learning_rate": 4.993026755852844e-06, "loss": 0.2675, "step": 14200 }, { "epoch": 0.56, "grad_norm": 5.923945998272752, "learning_rate": 4.992984949832776e-06, "loss": 0.2693, "step": 14225 }, { "epoch": 0.56, "grad_norm": 6.085703696247624, "learning_rate": 4.99294314381271e-06, "loss": 0.2661, "step": 14250 }, { "epoch": 0.56, "grad_norm": 5.73816088361352, "learning_rate": 4.992901337792643e-06, "loss": 0.2694, "step": 14275 }, { "epoch": 0.56, "grad_norm": 6.809164188200779, "learning_rate": 4.992859531772576e-06, "loss": 0.2719, "step": 14300 }, { "epoch": 0.56, "grad_norm": 6.3598512977073, "learning_rate": 4.992817725752509e-06, "loss": 0.2774, "step": 14325 }, { "epoch": 0.56, "grad_norm": 5.216046308562436, "learning_rate": 4.992775919732442e-06, "loss": 0.247, "step": 14350 }, { "epoch": 0.57, "grad_norm": 4.329199858263562, "learning_rate": 4.992734113712375e-06, "loss": 0.2868, "step": 14375 }, { "epoch": 0.57, "grad_norm": 5.210460931264835, "learning_rate": 4.992692307692308e-06, "loss": 0.2683, "step": 14400 }, { "epoch": 0.57, "grad_norm": 5.7667612517073765, "learning_rate": 4.992650501672241e-06, "loss": 0.2535, "step": 14425 }, { "epoch": 0.57, "grad_norm": 6.9612302730938405, "learning_rate": 4.992608695652174e-06, "loss": 0.2529, "step": 14450 }, { "epoch": 0.57, "grad_norm": 6.596575617259898, "learning_rate": 4.9925668896321076e-06, "loss": 0.253, "step": 14475 }, { "epoch": 0.57, "grad_norm": 4.62301372614655, "learning_rate": 4.99252508361204e-06, "loss": 0.2504, "step": 14500 }, { "epoch": 0.57, "grad_norm": 5.950147967461413, "learning_rate": 4.992483277591974e-06, "loss": 0.2682, "step": 14525 }, { "epoch": 0.57, "grad_norm": 6.881464976782473, "learning_rate": 4.9924414715719065e-06, "loss": 0.2473, "step": 14550 }, { "epoch": 0.57, "grad_norm": 6.705896744318513, "learning_rate": 4.99239966555184e-06, "loss": 0.2468, "step": 14575 }, { "epoch": 0.57, "grad_norm": 3.6827213444842526, "learning_rate": 4.992357859531773e-06, "loss": 0.3114, "step": 14600 }, { "epoch": 0.58, "grad_norm": 6.249966633309746, "learning_rate": 4.992317725752509e-06, "loss": 0.2633, "step": 14625 }, { "epoch": 0.58, "grad_norm": 5.42278315835385, "learning_rate": 4.992275919732441e-06, "loss": 0.2854, "step": 14650 }, { "epoch": 0.58, "grad_norm": 5.655143231460296, "learning_rate": 4.992234113712375e-06, "loss": 0.2655, "step": 14675 }, { "epoch": 0.58, "grad_norm": 7.858506902274973, "learning_rate": 4.9921923076923075e-06, "loss": 0.2913, "step": 14700 }, { "epoch": 0.58, "grad_norm": 5.606517868907838, "learning_rate": 4.992150501672241e-06, "loss": 0.2562, "step": 14725 }, { "epoch": 0.58, "grad_norm": 7.8892802718869275, "learning_rate": 4.992108695652174e-06, "loss": 0.2653, "step": 14750 }, { "epoch": 0.58, "grad_norm": 6.395026098564766, "learning_rate": 4.992066889632107e-06, "loss": 0.2717, "step": 14775 }, { "epoch": 0.58, "grad_norm": 6.043189975643226, "learning_rate": 4.99202508361204e-06, "loss": 0.2587, "step": 14800 }, { "epoch": 0.58, "grad_norm": 7.36609725720899, "learning_rate": 4.9919832775919736e-06, "loss": 0.272, "step": 14825 }, { "epoch": 0.58, "grad_norm": 4.645686026598199, "learning_rate": 4.991941471571906e-06, "loss": 0.2333, "step": 14850 }, { "epoch": 0.59, "grad_norm": 5.55507184083137, "learning_rate": 4.99189966555184e-06, "loss": 0.2509, "step": 14875 }, { "epoch": 0.59, "grad_norm": 5.345846578476869, "learning_rate": 4.991857859531773e-06, "loss": 0.2613, "step": 14900 }, { "epoch": 0.59, "grad_norm": 5.935348438197945, "learning_rate": 4.991816053511706e-06, "loss": 0.2816, "step": 14925 }, { "epoch": 0.59, "grad_norm": 5.803960018432536, "learning_rate": 4.99177424749164e-06, "loss": 0.2548, "step": 14950 }, { "epoch": 0.59, "grad_norm": 4.42912623970475, "learning_rate": 4.991732441471572e-06, "loss": 0.2529, "step": 14975 }, { "epoch": 0.59, "grad_norm": 4.975233188396705, "learning_rate": 4.991690635451506e-06, "loss": 0.2881, "step": 15000 }, { "epoch": 0.59, "grad_norm": 5.22507074373668, "learning_rate": 4.9916488294314385e-06, "loss": 0.2552, "step": 15025 }, { "epoch": 0.59, "grad_norm": 6.14001014159123, "learning_rate": 4.991607023411372e-06, "loss": 0.2684, "step": 15050 }, { "epoch": 0.59, "grad_norm": 6.034722792372274, "learning_rate": 4.991565217391305e-06, "loss": 0.2564, "step": 15075 }, { "epoch": 0.59, "grad_norm": 7.342705074428043, "learning_rate": 4.991523411371238e-06, "loss": 0.2542, "step": 15100 }, { "epoch": 0.6, "grad_norm": 5.154437791807671, "learning_rate": 4.991481605351171e-06, "loss": 0.2728, "step": 15125 }, { "epoch": 0.6, "grad_norm": 4.466502948123444, "learning_rate": 4.9914397993311046e-06, "loss": 0.2759, "step": 15150 }, { "epoch": 0.6, "grad_norm": 5.730270409539509, "learning_rate": 4.991397993311037e-06, "loss": 0.2671, "step": 15175 }, { "epoch": 0.6, "grad_norm": 4.13478451695111, "learning_rate": 4.991356187290971e-06, "loss": 0.2639, "step": 15200 }, { "epoch": 0.6, "grad_norm": 6.63611014055058, "learning_rate": 4.9913143812709035e-06, "loss": 0.2511, "step": 15225 }, { "epoch": 0.6, "grad_norm": 6.486679281838134, "learning_rate": 4.991272575250837e-06, "loss": 0.2615, "step": 15250 }, { "epoch": 0.6, "grad_norm": 6.609027754294971, "learning_rate": 4.99123076923077e-06, "loss": 0.2521, "step": 15275 }, { "epoch": 0.6, "grad_norm": 7.300744039135444, "learning_rate": 4.991188963210703e-06, "loss": 0.2734, "step": 15300 }, { "epoch": 0.6, "grad_norm": 5.302779818219576, "learning_rate": 4.991147157190635e-06, "loss": 0.2771, "step": 15325 }, { "epoch": 0.6, "grad_norm": 6.909444498848482, "learning_rate": 4.991105351170569e-06, "loss": 0.2675, "step": 15350 }, { "epoch": 0.6, "grad_norm": 5.43122899967809, "learning_rate": 4.991063545150502e-06, "loss": 0.2618, "step": 15375 }, { "epoch": 0.61, "grad_norm": 6.228246630467435, "learning_rate": 4.991021739130435e-06, "loss": 0.2687, "step": 15400 }, { "epoch": 0.61, "grad_norm": 6.064148750557926, "learning_rate": 4.9909799331103685e-06, "loss": 0.2428, "step": 15425 }, { "epoch": 0.61, "grad_norm": 5.255096086616142, "learning_rate": 4.990938127090301e-06, "loss": 0.2667, "step": 15450 }, { "epoch": 0.61, "grad_norm": 7.709327247522713, "learning_rate": 4.990896321070235e-06, "loss": 0.2732, "step": 15475 }, { "epoch": 0.61, "grad_norm": 6.184508161358503, "learning_rate": 4.9908545150501674e-06, "loss": 0.2719, "step": 15500 }, { "epoch": 0.61, "grad_norm": 4.922987038531951, "learning_rate": 4.990812709030101e-06, "loss": 0.2797, "step": 15525 }, { "epoch": 0.61, "grad_norm": 5.626376622087191, "learning_rate": 4.990770903010034e-06, "loss": 0.2479, "step": 15550 }, { "epoch": 0.61, "grad_norm": 5.514060439157896, "learning_rate": 4.990729096989967e-06, "loss": 0.2405, "step": 15575 }, { "epoch": 0.61, "grad_norm": 5.373488145308544, "learning_rate": 4.9906872909699e-06, "loss": 0.248, "step": 15600 }, { "epoch": 0.61, "grad_norm": 6.741275477526847, "learning_rate": 4.990647157190636e-06, "loss": 0.253, "step": 15625 }, { "epoch": 0.62, "grad_norm": 6.095305164848746, "learning_rate": 4.9906053511705685e-06, "loss": 0.2635, "step": 15650 }, { "epoch": 0.62, "grad_norm": 6.587285383976124, "learning_rate": 4.990563545150502e-06, "loss": 0.2713, "step": 15675 }, { "epoch": 0.62, "grad_norm": 5.9812853760264115, "learning_rate": 4.990521739130435e-06, "loss": 0.2796, "step": 15700 }, { "epoch": 0.62, "grad_norm": 6.944806936412515, "learning_rate": 4.990479933110368e-06, "loss": 0.2599, "step": 15725 }, { "epoch": 0.62, "grad_norm": 6.171130869044126, "learning_rate": 4.990438127090301e-06, "loss": 0.2489, "step": 15750 }, { "epoch": 0.62, "grad_norm": 6.206236751191819, "learning_rate": 4.9903963210702345e-06, "loss": 0.2717, "step": 15775 }, { "epoch": 0.62, "grad_norm": 6.974957589959287, "learning_rate": 4.990354515050167e-06, "loss": 0.2632, "step": 15800 }, { "epoch": 0.62, "grad_norm": 4.464737013981678, "learning_rate": 4.990312709030101e-06, "loss": 0.2562, "step": 15825 }, { "epoch": 0.62, "grad_norm": 7.039455268506516, "learning_rate": 4.9902709030100334e-06, "loss": 0.2589, "step": 15850 }, { "epoch": 0.62, "grad_norm": 6.93453838570339, "learning_rate": 4.990229096989967e-06, "loss": 0.254, "step": 15875 }, { "epoch": 0.63, "grad_norm": 7.429570899110624, "learning_rate": 4.9901872909699e-06, "loss": 0.2755, "step": 15900 }, { "epoch": 0.63, "grad_norm": 6.473116184036116, "learning_rate": 4.990145484949833e-06, "loss": 0.255, "step": 15925 }, { "epoch": 0.63, "grad_norm": 5.245501608124624, "learning_rate": 4.990103678929766e-06, "loss": 0.2323, "step": 15950 }, { "epoch": 0.63, "grad_norm": 5.1289839856825346, "learning_rate": 4.9900618729096995e-06, "loss": 0.2623, "step": 15975 }, { "epoch": 0.63, "grad_norm": 4.717814516086991, "learning_rate": 4.990020066889632e-06, "loss": 0.2414, "step": 16000 }, { "epoch": 0.63, "grad_norm": 5.751851025204562, "learning_rate": 4.989978260869566e-06, "loss": 0.2621, "step": 16025 }, { "epoch": 0.63, "grad_norm": 4.6353456530210755, "learning_rate": 4.989936454849499e-06, "loss": 0.2621, "step": 16050 }, { "epoch": 0.63, "grad_norm": 6.035407794665361, "learning_rate": 4.989894648829432e-06, "loss": 0.2706, "step": 16075 }, { "epoch": 0.63, "grad_norm": 7.175886486573118, "learning_rate": 4.9898528428093655e-06, "loss": 0.2556, "step": 16100 }, { "epoch": 0.63, "grad_norm": 5.306973626946458, "learning_rate": 4.989811036789298e-06, "loss": 0.2532, "step": 16125 }, { "epoch": 0.64, "grad_norm": 5.739524436385124, "learning_rate": 4.989769230769232e-06, "loss": 0.243, "step": 16150 }, { "epoch": 0.64, "grad_norm": 5.743554132202793, "learning_rate": 4.9897274247491644e-06, "loss": 0.2571, "step": 16175 }, { "epoch": 0.64, "grad_norm": 5.507860974296257, "learning_rate": 4.989685618729098e-06, "loss": 0.2498, "step": 16200 }, { "epoch": 0.64, "grad_norm": 7.521567618683169, "learning_rate": 4.989643812709031e-06, "loss": 0.2406, "step": 16225 }, { "epoch": 0.64, "grad_norm": 5.287306100434563, "learning_rate": 4.989602006688964e-06, "loss": 0.2548, "step": 16250 }, { "epoch": 0.64, "grad_norm": 5.605554498785768, "learning_rate": 4.989560200668896e-06, "loss": 0.2735, "step": 16275 }, { "epoch": 0.64, "grad_norm": 7.763241090962355, "learning_rate": 4.98951839464883e-06, "loss": 0.2306, "step": 16300 }, { "epoch": 0.64, "grad_norm": 5.236303105187178, "learning_rate": 4.989476588628762e-06, "loss": 0.2406, "step": 16325 }, { "epoch": 0.64, "grad_norm": 6.893192357925432, "learning_rate": 4.989434782608696e-06, "loss": 0.2643, "step": 16350 }, { "epoch": 0.64, "grad_norm": 5.7801672791546945, "learning_rate": 4.9893929765886286e-06, "loss": 0.2557, "step": 16375 }, { "epoch": 0.65, "grad_norm": 6.509427372226329, "learning_rate": 4.989351170568562e-06, "loss": 0.2408, "step": 16400 }, { "epoch": 0.65, "grad_norm": 6.145083984273696, "learning_rate": 4.989309364548495e-06, "loss": 0.2475, "step": 16425 }, { "epoch": 0.65, "grad_norm": 4.961929779970909, "learning_rate": 4.989267558528428e-06, "loss": 0.2543, "step": 16450 }, { "epoch": 0.65, "grad_norm": 6.237616244924249, "learning_rate": 4.989225752508362e-06, "loss": 0.2693, "step": 16475 }, { "epoch": 0.65, "grad_norm": 5.071941176628237, "learning_rate": 4.989183946488295e-06, "loss": 0.2672, "step": 16500 }, { "epoch": 0.65, "grad_norm": 5.99881469452368, "learning_rate": 4.989142140468228e-06, "loss": 0.252, "step": 16525 }, { "epoch": 0.65, "grad_norm": 6.824243326080856, "learning_rate": 4.989100334448161e-06, "loss": 0.2457, "step": 16550 }, { "epoch": 0.65, "grad_norm": 6.86991972909777, "learning_rate": 4.989058528428094e-06, "loss": 0.266, "step": 16575 }, { "epoch": 0.65, "grad_norm": 5.545283659754445, "learning_rate": 4.989016722408027e-06, "loss": 0.2781, "step": 16600 }, { "epoch": 0.65, "grad_norm": 4.925067126921487, "learning_rate": 4.988976588628763e-06, "loss": 0.2954, "step": 16625 }, { "epoch": 0.66, "grad_norm": 5.58821701479221, "learning_rate": 4.988934782608696e-06, "loss": 0.2494, "step": 16650 }, { "epoch": 0.66, "grad_norm": 5.185520155813638, "learning_rate": 4.988892976588629e-06, "loss": 0.2307, "step": 16675 }, { "epoch": 0.66, "grad_norm": 5.039375868942344, "learning_rate": 4.988851170568562e-06, "loss": 0.2454, "step": 16700 }, { "epoch": 0.66, "grad_norm": 5.8875232231482615, "learning_rate": 4.988809364548495e-06, "loss": 0.2556, "step": 16725 }, { "epoch": 0.66, "grad_norm": 4.585235280866713, "learning_rate": 4.988767558528428e-06, "loss": 0.2687, "step": 16750 }, { "epoch": 0.66, "grad_norm": 5.796602212489863, "learning_rate": 4.988725752508362e-06, "loss": 0.2573, "step": 16775 }, { "epoch": 0.66, "grad_norm": 6.1627805195183685, "learning_rate": 4.988683946488294e-06, "loss": 0.2932, "step": 16800 }, { "epoch": 0.66, "grad_norm": 5.552009029354894, "learning_rate": 4.988642140468228e-06, "loss": 0.2707, "step": 16825 }, { "epoch": 0.66, "grad_norm": 5.466725002249184, "learning_rate": 4.988600334448161e-06, "loss": 0.2336, "step": 16850 }, { "epoch": 0.66, "grad_norm": 5.186062157421561, "learning_rate": 4.988558528428094e-06, "loss": 0.2538, "step": 16875 }, { "epoch": 0.66, "grad_norm": 6.080865810052987, "learning_rate": 4.988516722408027e-06, "loss": 0.2464, "step": 16900 }, { "epoch": 0.67, "grad_norm": 6.570893467223135, "learning_rate": 4.98847491638796e-06, "loss": 0.2555, "step": 16925 }, { "epoch": 0.67, "grad_norm": 5.9998578151996504, "learning_rate": 4.988433110367893e-06, "loss": 0.2685, "step": 16950 }, { "epoch": 0.67, "grad_norm": 5.714104620985236, "learning_rate": 4.988391304347827e-06, "loss": 0.2579, "step": 16975 }, { "epoch": 0.67, "grad_norm": 6.204684812582076, "learning_rate": 4.988349498327759e-06, "loss": 0.2814, "step": 17000 }, { "epoch": 0.67, "grad_norm": 7.184079656953484, "learning_rate": 4.988307692307693e-06, "loss": 0.2413, "step": 17025 }, { "epoch": 0.67, "grad_norm": 5.268387433526686, "learning_rate": 4.988265886287626e-06, "loss": 0.2817, "step": 17050 }, { "epoch": 0.67, "grad_norm": 6.257289805906091, "learning_rate": 4.988224080267559e-06, "loss": 0.2595, "step": 17075 }, { "epoch": 0.67, "grad_norm": 5.0383919249393045, "learning_rate": 4.988182274247492e-06, "loss": 0.2666, "step": 17100 }, { "epoch": 0.67, "grad_norm": 5.291305025444263, "learning_rate": 4.988140468227425e-06, "loss": 0.2654, "step": 17125 }, { "epoch": 0.67, "grad_norm": 5.340130958791915, "learning_rate": 4.988098662207359e-06, "loss": 0.2525, "step": 17150 }, { "epoch": 0.68, "grad_norm": 4.911613542246811, "learning_rate": 4.988056856187292e-06, "loss": 0.2788, "step": 17175 }, { "epoch": 0.68, "grad_norm": 5.477361220204573, "learning_rate": 4.988015050167225e-06, "loss": 0.2522, "step": 17200 }, { "epoch": 0.68, "grad_norm": 4.792368739302577, "learning_rate": 4.987973244147157e-06, "loss": 0.2724, "step": 17225 }, { "epoch": 0.68, "grad_norm": 5.35695367909829, "learning_rate": 4.9879314381270906e-06, "loss": 0.2761, "step": 17250 }, { "epoch": 0.68, "grad_norm": 5.629047017884108, "learning_rate": 4.987889632107023e-06, "loss": 0.2635, "step": 17275 }, { "epoch": 0.68, "grad_norm": 5.331465117440273, "learning_rate": 4.987847826086957e-06, "loss": 0.2414, "step": 17300 }, { "epoch": 0.68, "grad_norm": 5.367627157757305, "learning_rate": 4.9878060200668895e-06, "loss": 0.2572, "step": 17325 }, { "epoch": 0.68, "grad_norm": 5.1172264981864295, "learning_rate": 4.987764214046823e-06, "loss": 0.2577, "step": 17350 }, { "epoch": 0.68, "grad_norm": 5.355305084531797, "learning_rate": 4.987724080267559e-06, "loss": 0.2792, "step": 17375 }, { "epoch": 0.68, "grad_norm": 5.842697216641398, "learning_rate": 4.987682274247492e-06, "loss": 0.2617, "step": 17400 }, { "epoch": 0.69, "grad_norm": 5.743056099274135, "learning_rate": 4.987640468227425e-06, "loss": 0.2488, "step": 17425 }, { "epoch": 0.69, "grad_norm": 8.36774370696684, "learning_rate": 4.987598662207358e-06, "loss": 0.2777, "step": 17450 }, { "epoch": 0.69, "grad_norm": 5.700174425799506, "learning_rate": 4.987556856187291e-06, "loss": 0.2445, "step": 17475 }, { "epoch": 0.69, "grad_norm": 5.5291577524920905, "learning_rate": 4.987515050167224e-06, "loss": 0.2391, "step": 17500 }, { "epoch": 0.69, "grad_norm": 5.0434604227152455, "learning_rate": 4.987473244147158e-06, "loss": 0.2868, "step": 17525 }, { "epoch": 0.69, "grad_norm": 6.471288503953715, "learning_rate": 4.98743143812709e-06, "loss": 0.294, "step": 17550 }, { "epoch": 0.69, "grad_norm": 5.967607026828377, "learning_rate": 4.987389632107024e-06, "loss": 0.2582, "step": 17575 }, { "epoch": 0.69, "grad_norm": 5.493557381929855, "learning_rate": 4.9873478260869566e-06, "loss": 0.2337, "step": 17600 }, { "epoch": 0.69, "grad_norm": 5.557469040585438, "learning_rate": 4.98730602006689e-06, "loss": 0.2703, "step": 17625 }, { "epoch": 0.69, "grad_norm": 5.523565121186858, "learning_rate": 4.987264214046823e-06, "loss": 0.2577, "step": 17650 }, { "epoch": 0.7, "grad_norm": 6.148642857874085, "learning_rate": 4.987222408026756e-06, "loss": 0.252, "step": 17675 }, { "epoch": 0.7, "grad_norm": 5.875580443467786, "learning_rate": 4.987180602006689e-06, "loss": 0.278, "step": 17700 }, { "epoch": 0.7, "grad_norm": 5.99454936628081, "learning_rate": 4.987138795986623e-06, "loss": 0.2721, "step": 17725 }, { "epoch": 0.7, "grad_norm": 4.555180518575604, "learning_rate": 4.987096989966555e-06, "loss": 0.2533, "step": 17750 }, { "epoch": 0.7, "grad_norm": 5.395100579611774, "learning_rate": 4.987055183946489e-06, "loss": 0.2625, "step": 17775 }, { "epoch": 0.7, "grad_norm": 5.563405837664269, "learning_rate": 4.9870133779264215e-06, "loss": 0.2622, "step": 17800 }, { "epoch": 0.7, "grad_norm": 6.043632738488831, "learning_rate": 4.986971571906355e-06, "loss": 0.2845, "step": 17825 }, { "epoch": 0.7, "grad_norm": 7.119469778219465, "learning_rate": 4.986929765886288e-06, "loss": 0.251, "step": 17850 }, { "epoch": 0.7, "grad_norm": 8.75507347596451, "learning_rate": 4.986887959866221e-06, "loss": 0.251, "step": 17875 }, { "epoch": 0.7, "grad_norm": 5.826211048039959, "learning_rate": 4.986846153846154e-06, "loss": 0.2691, "step": 17900 }, { "epoch": 0.71, "grad_norm": 5.010072111561531, "learning_rate": 4.9868043478260876e-06, "loss": 0.2366, "step": 17925 }, { "epoch": 0.71, "grad_norm": 4.859353986359138, "learning_rate": 4.98676254180602e-06, "loss": 0.2688, "step": 17950 }, { "epoch": 0.71, "grad_norm": 4.436740266722265, "learning_rate": 4.986720735785954e-06, "loss": 0.2287, "step": 17975 }, { "epoch": 0.71, "grad_norm": 7.699368648294974, "learning_rate": 4.9866789297658865e-06, "loss": 0.2623, "step": 18000 }, { "epoch": 0.71, "grad_norm": 4.286788999671542, "learning_rate": 4.98663712374582e-06, "loss": 0.2675, "step": 18025 }, { "epoch": 0.71, "grad_norm": 6.383383566653747, "learning_rate": 4.986595317725753e-06, "loss": 0.2483, "step": 18050 }, { "epoch": 0.71, "grad_norm": 6.558217801754021, "learning_rate": 4.986553511705686e-06, "loss": 0.2542, "step": 18075 }, { "epoch": 0.71, "grad_norm": 4.38291780085364, "learning_rate": 4.986511705685619e-06, "loss": 0.2497, "step": 18100 }, { "epoch": 0.71, "grad_norm": 5.520472839091616, "learning_rate": 4.9864698996655525e-06, "loss": 0.2574, "step": 18125 }, { "epoch": 0.71, "grad_norm": 6.77899108452888, "learning_rate": 4.986428093645485e-06, "loss": 0.264, "step": 18150 }, { "epoch": 0.72, "grad_norm": 6.778959356886794, "learning_rate": 4.986386287625418e-06, "loss": 0.2524, "step": 18175 }, { "epoch": 0.72, "grad_norm": 5.918405607392925, "learning_rate": 4.9863444816053515e-06, "loss": 0.2635, "step": 18200 }, { "epoch": 0.72, "grad_norm": 5.031548207897417, "learning_rate": 4.986302675585284e-06, "loss": 0.2426, "step": 18225 }, { "epoch": 0.72, "grad_norm": 5.352776823368371, "learning_rate": 4.986260869565218e-06, "loss": 0.2621, "step": 18250 }, { "epoch": 0.72, "grad_norm": 6.416316266136449, "learning_rate": 4.9862190635451504e-06, "loss": 0.2337, "step": 18275 }, { "epoch": 0.72, "grad_norm": 5.902376325347049, "learning_rate": 4.986177257525084e-06, "loss": 0.2764, "step": 18300 }, { "epoch": 0.72, "grad_norm": 5.577162546382801, "learning_rate": 4.986135451505017e-06, "loss": 0.2647, "step": 18325 }, { "epoch": 0.72, "grad_norm": 5.429483200140283, "learning_rate": 4.98609364548495e-06, "loss": 0.2539, "step": 18350 }, { "epoch": 0.72, "grad_norm": 4.472015436354258, "learning_rate": 4.986051839464883e-06, "loss": 0.2194, "step": 18375 }, { "epoch": 0.72, "grad_norm": 5.304795171586728, "learning_rate": 4.9860100334448165e-06, "loss": 0.2506, "step": 18400 }, { "epoch": 0.72, "grad_norm": 5.5728897025692525, "learning_rate": 4.985968227424749e-06, "loss": 0.2471, "step": 18425 }, { "epoch": 0.73, "grad_norm": 5.522313447060331, "learning_rate": 4.985926421404683e-06, "loss": 0.2708, "step": 18450 }, { "epoch": 0.73, "grad_norm": 6.393407912312494, "learning_rate": 4.985884615384615e-06, "loss": 0.2504, "step": 18475 }, { "epoch": 0.73, "grad_norm": 4.290470046509902, "learning_rate": 4.985842809364549e-06, "loss": 0.2418, "step": 18500 }, { "epoch": 0.73, "grad_norm": 3.8204122499023567, "learning_rate": 4.985801003344482e-06, "loss": 0.2303, "step": 18525 }, { "epoch": 0.73, "grad_norm": 4.705257208518913, "learning_rate": 4.985759197324415e-06, "loss": 0.2552, "step": 18550 }, { "epoch": 0.73, "grad_norm": 6.239833066925982, "learning_rate": 4.985717391304348e-06, "loss": 0.2349, "step": 18575 }, { "epoch": 0.73, "grad_norm": 5.007178185695814, "learning_rate": 4.9856755852842814e-06, "loss": 0.2746, "step": 18600 }, { "epoch": 0.73, "grad_norm": 5.604459846104763, "learning_rate": 4.985633779264214e-06, "loss": 0.2556, "step": 18625 }, { "epoch": 0.73, "grad_norm": 4.923135167645596, "learning_rate": 4.985591973244148e-06, "loss": 0.2602, "step": 18650 }, { "epoch": 0.73, "grad_norm": 6.086240722877576, "learning_rate": 4.985550167224081e-06, "loss": 0.2445, "step": 18675 }, { "epoch": 0.74, "grad_norm": 6.46646029604406, "learning_rate": 4.985508361204014e-06, "loss": 0.253, "step": 18700 }, { "epoch": 0.74, "grad_norm": 5.479558717023913, "learning_rate": 4.9854665551839475e-06, "loss": 0.2642, "step": 18725 }, { "epoch": 0.74, "grad_norm": 6.161950185397198, "learning_rate": 4.98542474916388e-06, "loss": 0.2664, "step": 18750 }, { "epoch": 0.74, "grad_norm": 7.149826660631525, "learning_rate": 4.985382943143814e-06, "loss": 0.2735, "step": 18775 }, { "epoch": 0.74, "grad_norm": 5.098059065958003, "learning_rate": 4.985341137123746e-06, "loss": 0.2626, "step": 18800 }, { "epoch": 0.74, "grad_norm": 6.7322272581947376, "learning_rate": 4.98529933110368e-06, "loss": 0.2706, "step": 18825 }, { "epoch": 0.74, "grad_norm": 6.485082925954047, "learning_rate": 4.985257525083613e-06, "loss": 0.265, "step": 18850 }, { "epoch": 0.74, "grad_norm": 3.8109358920665106, "learning_rate": 4.985215719063545e-06, "loss": 0.2364, "step": 18875 }, { "epoch": 0.74, "grad_norm": 5.94763670161086, "learning_rate": 4.985173913043478e-06, "loss": 0.238, "step": 18900 }, { "epoch": 0.74, "grad_norm": 6.52671801682198, "learning_rate": 4.985132107023412e-06, "loss": 0.2535, "step": 18925 }, { "epoch": 0.75, "grad_norm": 5.79617606507355, "learning_rate": 4.985090301003344e-06, "loss": 0.2319, "step": 18950 }, { "epoch": 0.75, "grad_norm": 4.661160862165919, "learning_rate": 4.985048494983278e-06, "loss": 0.25, "step": 18975 }, { "epoch": 0.75, "grad_norm": 5.823787487368952, "learning_rate": 4.9850066889632105e-06, "loss": 0.2494, "step": 19000 }, { "epoch": 0.75, "grad_norm": 6.041962533822711, "learning_rate": 4.984964882943144e-06, "loss": 0.2534, "step": 19025 }, { "epoch": 0.75, "grad_norm": 6.137209039165049, "learning_rate": 4.984923076923077e-06, "loss": 0.2485, "step": 19050 }, { "epoch": 0.75, "grad_norm": 6.157303270364183, "learning_rate": 4.98488127090301e-06, "loss": 0.2495, "step": 19075 }, { "epoch": 0.75, "grad_norm": 5.804140711641237, "learning_rate": 4.984839464882944e-06, "loss": 0.2547, "step": 19100 }, { "epoch": 0.75, "grad_norm": 7.783724624022202, "learning_rate": 4.9847976588628766e-06, "loss": 0.2346, "step": 19125 }, { "epoch": 0.75, "grad_norm": 5.69877397127529, "learning_rate": 4.98475585284281e-06, "loss": 0.2892, "step": 19150 }, { "epoch": 0.75, "grad_norm": 6.811523610920984, "learning_rate": 4.984714046822743e-06, "loss": 0.2576, "step": 19175 }, { "epoch": 0.76, "grad_norm": 5.632437319589204, "learning_rate": 4.984672240802676e-06, "loss": 0.2688, "step": 19200 }, { "epoch": 0.76, "grad_norm": 5.287424341539971, "learning_rate": 4.984630434782609e-06, "loss": 0.2582, "step": 19225 }, { "epoch": 0.76, "grad_norm": 5.27438775796841, "learning_rate": 4.984588628762543e-06, "loss": 0.2517, "step": 19250 }, { "epoch": 0.76, "grad_norm": 6.312578396357088, "learning_rate": 4.984546822742475e-06, "loss": 0.2659, "step": 19275 }, { "epoch": 0.76, "grad_norm": 6.561038478654943, "learning_rate": 4.984505016722409e-06, "loss": 0.2669, "step": 19300 }, { "epoch": 0.76, "grad_norm": 5.883831211163112, "learning_rate": 4.9844632107023416e-06, "loss": 0.2644, "step": 19325 }, { "epoch": 0.76, "grad_norm": 4.7206392552751435, "learning_rate": 4.984421404682275e-06, "loss": 0.2419, "step": 19350 }, { "epoch": 0.76, "grad_norm": 5.772561430178686, "learning_rate": 4.98438127090301e-06, "loss": 0.2545, "step": 19375 }, { "epoch": 0.76, "grad_norm": 5.808370166675897, "learning_rate": 4.984339464882944e-06, "loss": 0.2463, "step": 19400 }, { "epoch": 0.76, "grad_norm": 5.8556307395440825, "learning_rate": 4.984297658862876e-06, "loss": 0.2811, "step": 19425 }, { "epoch": 0.77, "grad_norm": 6.305858961213936, "learning_rate": 4.98425585284281e-06, "loss": 0.2662, "step": 19450 }, { "epoch": 0.77, "grad_norm": 6.59470367618029, "learning_rate": 4.9842140468227426e-06, "loss": 0.2878, "step": 19475 }, { "epoch": 0.77, "grad_norm": 6.3608941320554395, "learning_rate": 4.984172240802676e-06, "loss": 0.2566, "step": 19500 }, { "epoch": 0.77, "grad_norm": 5.757478418973163, "learning_rate": 4.984130434782609e-06, "loss": 0.2448, "step": 19525 }, { "epoch": 0.77, "grad_norm": 4.286220221553772, "learning_rate": 4.984088628762542e-06, "loss": 0.2359, "step": 19550 }, { "epoch": 0.77, "grad_norm": 5.690642809026132, "learning_rate": 4.984046822742475e-06, "loss": 0.2417, "step": 19575 }, { "epoch": 0.77, "grad_norm": 4.924557118890291, "learning_rate": 4.984005016722409e-06, "loss": 0.2663, "step": 19600 }, { "epoch": 0.77, "grad_norm": 6.77094033240899, "learning_rate": 4.983963210702341e-06, "loss": 0.2548, "step": 19625 }, { "epoch": 0.77, "grad_norm": 6.358984573786143, "learning_rate": 4.983921404682275e-06, "loss": 0.2619, "step": 19650 }, { "epoch": 0.77, "grad_norm": 5.7311555484725965, "learning_rate": 4.9838795986622075e-06, "loss": 0.2821, "step": 19675 }, { "epoch": 0.78, "grad_norm": 5.141037303387008, "learning_rate": 4.983837792642141e-06, "loss": 0.2697, "step": 19700 }, { "epoch": 0.78, "grad_norm": 5.218327752735989, "learning_rate": 4.983795986622074e-06, "loss": 0.2596, "step": 19725 }, { "epoch": 0.78, "grad_norm": 6.478272133179351, "learning_rate": 4.983754180602007e-06, "loss": 0.2788, "step": 19750 }, { "epoch": 0.78, "grad_norm": 7.3508943589897315, "learning_rate": 4.983712374581941e-06, "loss": 0.2787, "step": 19775 }, { "epoch": 0.78, "grad_norm": 5.109200564645706, "learning_rate": 4.983670568561874e-06, "loss": 0.226, "step": 19800 }, { "epoch": 0.78, "grad_norm": 5.18181645808782, "learning_rate": 4.983628762541806e-06, "loss": 0.2721, "step": 19825 }, { "epoch": 0.78, "grad_norm": 5.996776147055529, "learning_rate": 4.983586956521739e-06, "loss": 0.2465, "step": 19850 }, { "epoch": 0.78, "grad_norm": 5.761328825995187, "learning_rate": 4.9835451505016725e-06, "loss": 0.2453, "step": 19875 }, { "epoch": 0.78, "grad_norm": 5.828048723654273, "learning_rate": 4.983503344481605e-06, "loss": 0.2522, "step": 19900 }, { "epoch": 0.78, "grad_norm": 5.580918856632557, "learning_rate": 4.983461538461539e-06, "loss": 0.2744, "step": 19925 }, { "epoch": 0.78, "grad_norm": 6.91882009627752, "learning_rate": 4.9834197324414715e-06, "loss": 0.2666, "step": 19950 }, { "epoch": 0.79, "grad_norm": 5.32076952903993, "learning_rate": 4.983377926421405e-06, "loss": 0.265, "step": 19975 }, { "epoch": 0.79, "grad_norm": 4.971506292016713, "learning_rate": 4.983336120401338e-06, "loss": 0.2646, "step": 20000 }, { "epoch": 0.79, "eval_loss": 0.4140625, "eval_runtime": 11517.8974, "eval_samples_per_second": 0.822, "eval_steps_per_second": 0.051, "eval_wer": 0.1283560960700095, "step": 20000 }, { "epoch": 0.79, "grad_norm": 7.597765424127864, "learning_rate": 4.983294314381271e-06, "loss": 0.2477, "step": 20025 }, { "epoch": 0.79, "grad_norm": 6.125850278326169, "learning_rate": 4.983252508361204e-06, "loss": 0.2679, "step": 20050 }, { "epoch": 0.79, "grad_norm": 5.937530192709086, "learning_rate": 4.9832107023411375e-06, "loss": 0.2524, "step": 20075 }, { "epoch": 0.79, "grad_norm": 6.879107953793692, "learning_rate": 4.98316889632107e-06, "loss": 0.2629, "step": 20100 }, { "epoch": 0.79, "grad_norm": 4.726138480598868, "learning_rate": 4.983127090301004e-06, "loss": 0.2541, "step": 20125 }, { "epoch": 0.79, "grad_norm": 8.90017454045631, "learning_rate": 4.9830852842809364e-06, "loss": 0.263, "step": 20150 }, { "epoch": 0.79, "grad_norm": 6.581872676278387, "learning_rate": 4.98304347826087e-06, "loss": 0.2568, "step": 20175 }, { "epoch": 0.79, "grad_norm": 4.54004763181376, "learning_rate": 4.9830016722408035e-06, "loss": 0.237, "step": 20200 }, { "epoch": 0.8, "grad_norm": 5.106351027498988, "learning_rate": 4.982959866220736e-06, "loss": 0.2521, "step": 20225 }, { "epoch": 0.8, "grad_norm": 6.218144145112456, "learning_rate": 4.98291806020067e-06, "loss": 0.2719, "step": 20250 }, { "epoch": 0.8, "grad_norm": 4.68621541939387, "learning_rate": 4.9828762541806025e-06, "loss": 0.2378, "step": 20275 }, { "epoch": 0.8, "grad_norm": 4.990952354340057, "learning_rate": 4.982834448160536e-06, "loss": 0.2557, "step": 20300 }, { "epoch": 0.8, "grad_norm": 5.0326266609134045, "learning_rate": 4.982792642140469e-06, "loss": 0.2745, "step": 20325 }, { "epoch": 0.8, "grad_norm": 5.628421286827086, "learning_rate": 4.982750836120402e-06, "loss": 0.2832, "step": 20350 }, { "epoch": 0.8, "grad_norm": 5.890967827336047, "learning_rate": 4.982710702341137e-06, "loss": 0.2428, "step": 20375 }, { "epoch": 0.8, "grad_norm": 5.757261412656723, "learning_rate": 4.982668896321071e-06, "loss": 0.2363, "step": 20400 }, { "epoch": 0.8, "grad_norm": 5.790150924630483, "learning_rate": 4.9826270903010035e-06, "loss": 0.2841, "step": 20425 }, { "epoch": 0.8, "grad_norm": 5.334436845925394, "learning_rate": 4.982585284280937e-06, "loss": 0.2335, "step": 20450 }, { "epoch": 0.81, "grad_norm": 6.496338521482485, "learning_rate": 4.98254347826087e-06, "loss": 0.256, "step": 20475 }, { "epoch": 0.81, "grad_norm": 5.1183227106736355, "learning_rate": 4.982501672240803e-06, "loss": 0.2316, "step": 20500 }, { "epoch": 0.81, "grad_norm": 5.6851818591062955, "learning_rate": 4.982459866220736e-06, "loss": 0.2858, "step": 20525 }, { "epoch": 0.81, "grad_norm": 6.243803882140677, "learning_rate": 4.9824180602006695e-06, "loss": 0.2509, "step": 20550 }, { "epoch": 0.81, "grad_norm": 5.557348957276566, "learning_rate": 4.982376254180602e-06, "loss": 0.255, "step": 20575 }, { "epoch": 0.81, "grad_norm": 7.057972306154081, "learning_rate": 4.982334448160536e-06, "loss": 0.2673, "step": 20600 }, { "epoch": 0.81, "grad_norm": 5.269049715317146, "learning_rate": 4.9822926421404685e-06, "loss": 0.2332, "step": 20625 }, { "epoch": 0.81, "grad_norm": 5.0641172747571215, "learning_rate": 4.982250836120402e-06, "loss": 0.2438, "step": 20650 }, { "epoch": 0.81, "grad_norm": 4.545638271569368, "learning_rate": 4.982209030100335e-06, "loss": 0.2507, "step": 20675 }, { "epoch": 0.81, "grad_norm": 5.833842229029364, "learning_rate": 4.982167224080268e-06, "loss": 0.2692, "step": 20700 }, { "epoch": 0.82, "grad_norm": 5.91131799728637, "learning_rate": 4.982125418060201e-06, "loss": 0.2708, "step": 20725 }, { "epoch": 0.82, "grad_norm": 6.81240765396023, "learning_rate": 4.982083612040134e-06, "loss": 0.2362, "step": 20750 }, { "epoch": 0.82, "grad_norm": 5.808809996905685, "learning_rate": 4.982041806020067e-06, "loss": 0.2398, "step": 20775 }, { "epoch": 0.82, "grad_norm": 6.063650018965221, "learning_rate": 4.982e-06, "loss": 0.2631, "step": 20800 }, { "epoch": 0.82, "grad_norm": 5.759033532600636, "learning_rate": 4.9819581939799334e-06, "loss": 0.2227, "step": 20825 }, { "epoch": 0.82, "grad_norm": 5.455666288438305, "learning_rate": 4.981916387959866e-06, "loss": 0.2445, "step": 20850 }, { "epoch": 0.82, "grad_norm": 6.701923172622271, "learning_rate": 4.9818745819398e-06, "loss": 0.2575, "step": 20875 }, { "epoch": 0.82, "grad_norm": 4.9618139565457255, "learning_rate": 4.981832775919732e-06, "loss": 0.2406, "step": 20900 }, { "epoch": 0.82, "grad_norm": 7.68671913016998, "learning_rate": 4.981790969899666e-06, "loss": 0.2665, "step": 20925 }, { "epoch": 0.82, "grad_norm": 4.722574500390105, "learning_rate": 4.981749163879599e-06, "loss": 0.2569, "step": 20950 }, { "epoch": 0.83, "grad_norm": 4.572541737429397, "learning_rate": 4.981707357859532e-06, "loss": 0.239, "step": 20975 }, { "epoch": 0.83, "grad_norm": 5.415209470257432, "learning_rate": 4.981665551839465e-06, "loss": 0.2451, "step": 21000 }, { "epoch": 0.83, "grad_norm": 6.020681059671357, "learning_rate": 4.9816237458193984e-06, "loss": 0.2469, "step": 21025 }, { "epoch": 0.83, "grad_norm": 6.046597623490536, "learning_rate": 4.981581939799331e-06, "loss": 0.2429, "step": 21050 }, { "epoch": 0.83, "grad_norm": 5.68155367027518, "learning_rate": 4.981540133779265e-06, "loss": 0.2215, "step": 21075 }, { "epoch": 0.83, "grad_norm": 6.289254570068965, "learning_rate": 4.981498327759197e-06, "loss": 0.2596, "step": 21100 }, { "epoch": 0.83, "grad_norm": 5.531907347250682, "learning_rate": 4.981456521739131e-06, "loss": 0.2724, "step": 21125 }, { "epoch": 0.83, "grad_norm": 3.70719685809795, "learning_rate": 4.981414715719064e-06, "loss": 0.246, "step": 21150 }, { "epoch": 0.83, "grad_norm": 5.033777182035272, "learning_rate": 4.981372909698997e-06, "loss": 0.2603, "step": 21175 }, { "epoch": 0.83, "grad_norm": 6.997263007017261, "learning_rate": 4.98133110367893e-06, "loss": 0.2549, "step": 21200 }, { "epoch": 0.84, "grad_norm": 7.397447058530203, "learning_rate": 4.981289297658863e-06, "loss": 0.2252, "step": 21225 }, { "epoch": 0.84, "grad_norm": 6.387609355487834, "learning_rate": 4.981247491638796e-06, "loss": 0.2424, "step": 21250 }, { "epoch": 0.84, "grad_norm": 5.86389992482405, "learning_rate": 4.981207357859532e-06, "loss": 0.2808, "step": 21275 }, { "epoch": 0.84, "grad_norm": 5.886969996279023, "learning_rate": 4.9811655518394655e-06, "loss": 0.2445, "step": 21300 }, { "epoch": 0.84, "grad_norm": 4.930646590051299, "learning_rate": 4.981123745819398e-06, "loss": 0.2385, "step": 21325 }, { "epoch": 0.84, "grad_norm": 6.658704856684161, "learning_rate": 4.981081939799332e-06, "loss": 0.2654, "step": 21350 }, { "epoch": 0.84, "grad_norm": 5.85826584313124, "learning_rate": 4.981040133779264e-06, "loss": 0.2787, "step": 21375 }, { "epoch": 0.84, "grad_norm": 6.2136211167108915, "learning_rate": 4.980998327759198e-06, "loss": 0.2496, "step": 21400 }, { "epoch": 0.84, "grad_norm": 6.292493595501454, "learning_rate": 4.980956521739131e-06, "loss": 0.2398, "step": 21425 }, { "epoch": 0.84, "grad_norm": 6.058955089554257, "learning_rate": 4.980914715719064e-06, "loss": 0.2399, "step": 21450 }, { "epoch": 0.84, "grad_norm": 5.676342839883485, "learning_rate": 4.980872909698997e-06, "loss": 0.2323, "step": 21475 }, { "epoch": 0.85, "grad_norm": 4.982168817113219, "learning_rate": 4.9808311036789305e-06, "loss": 0.2391, "step": 21500 }, { "epoch": 0.85, "grad_norm": 6.193462179348854, "learning_rate": 4.980789297658863e-06, "loss": 0.2386, "step": 21525 }, { "epoch": 0.85, "grad_norm": 5.014739296688169, "learning_rate": 4.980747491638797e-06, "loss": 0.2514, "step": 21550 }, { "epoch": 0.85, "grad_norm": 4.930490666345008, "learning_rate": 4.980705685618729e-06, "loss": 0.2643, "step": 21575 }, { "epoch": 0.85, "grad_norm": 6.295124932611308, "learning_rate": 4.980663879598663e-06, "loss": 0.2689, "step": 21600 }, { "epoch": 0.85, "grad_norm": 5.907807211766026, "learning_rate": 4.980622073578596e-06, "loss": 0.2589, "step": 21625 }, { "epoch": 0.85, "grad_norm": 6.178379073295326, "learning_rate": 4.980580267558529e-06, "loss": 0.2515, "step": 21650 }, { "epoch": 0.85, "grad_norm": 5.000008304241975, "learning_rate": 4.980538461538462e-06, "loss": 0.2544, "step": 21675 }, { "epoch": 0.85, "grad_norm": 5.7552188199845675, "learning_rate": 4.980496655518395e-06, "loss": 0.2474, "step": 21700 }, { "epoch": 0.85, "grad_norm": 6.291094061313578, "learning_rate": 4.980454849498328e-06, "loss": 0.2488, "step": 21725 }, { "epoch": 0.86, "grad_norm": 4.7158702845231, "learning_rate": 4.980413043478261e-06, "loss": 0.2377, "step": 21750 }, { "epoch": 0.86, "grad_norm": 4.371680856314009, "learning_rate": 4.980371237458194e-06, "loss": 0.2695, "step": 21775 }, { "epoch": 0.86, "grad_norm": 5.362994192905961, "learning_rate": 4.980329431438127e-06, "loss": 0.2622, "step": 21800 }, { "epoch": 0.86, "grad_norm": 6.382584959830635, "learning_rate": 4.980287625418061e-06, "loss": 0.2458, "step": 21825 }, { "epoch": 0.86, "grad_norm": 5.793487011383763, "learning_rate": 4.980245819397993e-06, "loss": 0.288, "step": 21850 }, { "epoch": 0.86, "grad_norm": 4.399312922304265, "learning_rate": 4.980204013377927e-06, "loss": 0.2615, "step": 21875 }, { "epoch": 0.86, "grad_norm": 6.081833307673779, "learning_rate": 4.9801622073578596e-06, "loss": 0.2694, "step": 21900 }, { "epoch": 0.86, "grad_norm": 4.915184300784432, "learning_rate": 4.980120401337793e-06, "loss": 0.2545, "step": 21925 }, { "epoch": 0.86, "grad_norm": 5.794109389200768, "learning_rate": 4.980078595317726e-06, "loss": 0.2422, "step": 21950 }, { "epoch": 0.86, "grad_norm": 6.638448141225639, "learning_rate": 4.980036789297659e-06, "loss": 0.2736, "step": 21975 }, { "epoch": 0.87, "grad_norm": 4.2006435060994844, "learning_rate": 4.979994983277592e-06, "loss": 0.2537, "step": 22000 }, { "epoch": 0.87, "grad_norm": 4.600066690029428, "learning_rate": 4.979953177257526e-06, "loss": 0.2654, "step": 22025 }, { "epoch": 0.87, "grad_norm": 5.687952294883638, "learning_rate": 4.979911371237458e-06, "loss": 0.2593, "step": 22050 }, { "epoch": 0.87, "grad_norm": 5.627401660974489, "learning_rate": 4.979869565217392e-06, "loss": 0.2711, "step": 22075 }, { "epoch": 0.87, "grad_norm": 5.065685866336755, "learning_rate": 4.9798277591973245e-06, "loss": 0.2387, "step": 22100 }, { "epoch": 0.87, "grad_norm": 4.682468578553179, "learning_rate": 4.979785953177258e-06, "loss": 0.2351, "step": 22125 }, { "epoch": 0.87, "grad_norm": 5.893827612677344, "learning_rate": 4.979744147157191e-06, "loss": 0.2501, "step": 22150 }, { "epoch": 0.87, "grad_norm": 5.300974420339823, "learning_rate": 4.979702341137124e-06, "loss": 0.2292, "step": 22175 }, { "epoch": 0.87, "grad_norm": 6.750330015989807, "learning_rate": 4.979660535117057e-06, "loss": 0.2534, "step": 22200 }, { "epoch": 0.87, "grad_norm": 4.921714939272224, "learning_rate": 4.9796187290969906e-06, "loss": 0.2378, "step": 22225 }, { "epoch": 0.88, "grad_norm": 6.104235634143888, "learning_rate": 4.979576923076923e-06, "loss": 0.239, "step": 22250 }, { "epoch": 0.88, "grad_norm": 6.365003485750156, "learning_rate": 4.979535117056857e-06, "loss": 0.2401, "step": 22275 }, { "epoch": 0.88, "grad_norm": 5.8943944369540064, "learning_rate": 4.9794933110367895e-06, "loss": 0.2703, "step": 22300 }, { "epoch": 0.88, "grad_norm": 5.740334114124547, "learning_rate": 4.979451505016723e-06, "loss": 0.2448, "step": 22325 }, { "epoch": 0.88, "grad_norm": 6.26645969958187, "learning_rate": 4.979409698996656e-06, "loss": 0.237, "step": 22350 }, { "epoch": 0.88, "grad_norm": 5.188296175194914, "learning_rate": 4.979367892976589e-06, "loss": 0.2439, "step": 22375 }, { "epoch": 0.88, "grad_norm": 5.28258281943557, "learning_rate": 4.979326086956522e-06, "loss": 0.2319, "step": 22400 }, { "epoch": 0.88, "grad_norm": 4.876641118702411, "learning_rate": 4.979284280936455e-06, "loss": 0.2256, "step": 22425 }, { "epoch": 0.88, "grad_norm": 5.9872075168697, "learning_rate": 4.979242474916388e-06, "loss": 0.2401, "step": 22450 }, { "epoch": 0.88, "grad_norm": 5.709817054749593, "learning_rate": 4.979200668896321e-06, "loss": 0.2318, "step": 22475 }, { "epoch": 0.89, "grad_norm": 5.821509746602487, "learning_rate": 4.9791588628762545e-06, "loss": 0.2556, "step": 22500 }, { "epoch": 0.89, "grad_norm": 4.68934415618322, "learning_rate": 4.979117056856187e-06, "loss": 0.2558, "step": 22525 }, { "epoch": 0.89, "grad_norm": 5.509947218331295, "learning_rate": 4.979075250836121e-06, "loss": 0.2365, "step": 22550 }, { "epoch": 0.89, "grad_norm": 4.58010564782228, "learning_rate": 4.9790334448160534e-06, "loss": 0.2585, "step": 22575 }, { "epoch": 0.89, "grad_norm": 5.350109880497084, "learning_rate": 4.978991638795987e-06, "loss": 0.2496, "step": 22600 }, { "epoch": 0.89, "grad_norm": 5.52596483622084, "learning_rate": 4.97894983277592e-06, "loss": 0.2513, "step": 22625 }, { "epoch": 0.89, "grad_norm": 4.5425308104361335, "learning_rate": 4.978908026755853e-06, "loss": 0.2317, "step": 22650 }, { "epoch": 0.89, "grad_norm": 5.042990505291717, "learning_rate": 4.978866220735786e-06, "loss": 0.2425, "step": 22675 }, { "epoch": 0.89, "grad_norm": 5.28155864508552, "learning_rate": 4.9788244147157195e-06, "loss": 0.2433, "step": 22700 }, { "epoch": 0.89, "grad_norm": 5.05219957106238, "learning_rate": 4.978782608695652e-06, "loss": 0.2582, "step": 22725 }, { "epoch": 0.89, "grad_norm": 6.3750393384628925, "learning_rate": 4.978740802675586e-06, "loss": 0.24, "step": 22750 }, { "epoch": 0.9, "grad_norm": 4.981026672978073, "learning_rate": 4.978698996655518e-06, "loss": 0.2564, "step": 22775 }, { "epoch": 0.9, "grad_norm": 4.8161776421109055, "learning_rate": 4.978657190635452e-06, "loss": 0.239, "step": 22800 }, { "epoch": 0.9, "grad_norm": 7.376213905893997, "learning_rate": 4.978615384615385e-06, "loss": 0.2611, "step": 22825 }, { "epoch": 0.9, "grad_norm": 5.15776727013437, "learning_rate": 4.978573578595318e-06, "loss": 0.2576, "step": 22850 }, { "epoch": 0.9, "grad_norm": 5.04185750311223, "learning_rate": 4.978531772575252e-06, "loss": 0.2482, "step": 22875 }, { "epoch": 0.9, "grad_norm": 5.950731538074712, "learning_rate": 4.9784899665551844e-06, "loss": 0.269, "step": 22900 }, { "epoch": 0.9, "grad_norm": 6.205593383648058, "learning_rate": 4.978448160535118e-06, "loss": 0.2401, "step": 22925 }, { "epoch": 0.9, "grad_norm": 5.548735869447201, "learning_rate": 4.978406354515051e-06, "loss": 0.2462, "step": 22950 }, { "epoch": 0.9, "grad_norm": 5.329571742913044, "learning_rate": 4.978364548494984e-06, "loss": 0.2636, "step": 22975 }, { "epoch": 0.9, "grad_norm": 5.536247610819822, "learning_rate": 4.978322742474917e-06, "loss": 0.2512, "step": 23000 }, { "epoch": 0.91, "grad_norm": 5.573520221822453, "learning_rate": 4.9782809364548505e-06, "loss": 0.2551, "step": 23025 }, { "epoch": 0.91, "grad_norm": 5.059324762232372, "learning_rate": 4.978239130434783e-06, "loss": 0.2677, "step": 23050 }, { "epoch": 0.91, "grad_norm": 6.4312911338529535, "learning_rate": 4.978197324414717e-06, "loss": 0.2647, "step": 23075 }, { "epoch": 0.91, "grad_norm": 5.905908003456633, "learning_rate": 4.978155518394649e-06, "loss": 0.241, "step": 23100 }, { "epoch": 0.91, "grad_norm": 6.305362513811501, "learning_rate": 4.978113712374582e-06, "loss": 0.2759, "step": 23125 }, { "epoch": 0.91, "grad_norm": 6.334507557305194, "learning_rate": 4.978071906354515e-06, "loss": 0.2471, "step": 23150 }, { "epoch": 0.91, "grad_norm": 5.167193712946236, "learning_rate": 4.978030100334448e-06, "loss": 0.2454, "step": 23175 }, { "epoch": 0.91, "grad_norm": 5.575127667668046, "learning_rate": 4.977988294314381e-06, "loss": 0.2166, "step": 23200 }, { "epoch": 0.91, "grad_norm": 5.829621265497541, "learning_rate": 4.977948160535118e-06, "loss": 0.2363, "step": 23225 }, { "epoch": 0.91, "grad_norm": 5.561630435288059, "learning_rate": 4.9779063545150504e-06, "loss": 0.2368, "step": 23250 }, { "epoch": 0.92, "grad_norm": 4.602771537076456, "learning_rate": 4.977864548494984e-06, "loss": 0.264, "step": 23275 }, { "epoch": 0.92, "grad_norm": 4.8921357782406565, "learning_rate": 4.977822742474917e-06, "loss": 0.2425, "step": 23300 }, { "epoch": 0.92, "grad_norm": 6.761815993674995, "learning_rate": 4.97778093645485e-06, "loss": 0.2628, "step": 23325 }, { "epoch": 0.92, "grad_norm": 4.789432658329115, "learning_rate": 4.977739130434783e-06, "loss": 0.2415, "step": 23350 }, { "epoch": 0.92, "grad_norm": 5.212070910404648, "learning_rate": 4.977697324414716e-06, "loss": 0.2606, "step": 23375 }, { "epoch": 0.92, "grad_norm": 5.645169198733312, "learning_rate": 4.977655518394649e-06, "loss": 0.2295, "step": 23400 }, { "epoch": 0.92, "grad_norm": 5.941660028505237, "learning_rate": 4.977613712374582e-06, "loss": 0.259, "step": 23425 }, { "epoch": 0.92, "grad_norm": 6.006340050777933, "learning_rate": 4.977571906354515e-06, "loss": 0.2489, "step": 23450 }, { "epoch": 0.92, "grad_norm": 5.444718325419201, "learning_rate": 4.977530100334448e-06, "loss": 0.2252, "step": 23475 }, { "epoch": 0.92, "grad_norm": 5.315731137511242, "learning_rate": 4.977488294314382e-06, "loss": 0.2444, "step": 23500 }, { "epoch": 0.93, "grad_norm": 5.147536052111137, "learning_rate": 4.977446488294314e-06, "loss": 0.253, "step": 23525 }, { "epoch": 0.93, "grad_norm": 4.637894947771309, "learning_rate": 4.977404682274248e-06, "loss": 0.2411, "step": 23550 }, { "epoch": 0.93, "grad_norm": 7.3547145173015, "learning_rate": 4.977362876254181e-06, "loss": 0.249, "step": 23575 }, { "epoch": 0.93, "grad_norm": 6.191389830163696, "learning_rate": 4.977321070234114e-06, "loss": 0.2977, "step": 23600 }, { "epoch": 0.93, "grad_norm": 4.747541667430566, "learning_rate": 4.977279264214047e-06, "loss": 0.2449, "step": 23625 }, { "epoch": 0.93, "grad_norm": 4.520805102077198, "learning_rate": 4.97723745819398e-06, "loss": 0.2308, "step": 23650 }, { "epoch": 0.93, "grad_norm": 5.417279655484847, "learning_rate": 4.977195652173913e-06, "loss": 0.2282, "step": 23675 }, { "epoch": 0.93, "grad_norm": 5.814135551224237, "learning_rate": 4.977153846153847e-06, "loss": 0.2407, "step": 23700 }, { "epoch": 0.93, "grad_norm": 5.799363124129896, "learning_rate": 4.977112040133779e-06, "loss": 0.2464, "step": 23725 }, { "epoch": 0.93, "grad_norm": 5.926212906223406, "learning_rate": 4.977070234113713e-06, "loss": 0.2543, "step": 23750 }, { "epoch": 0.94, "grad_norm": 4.757663142452678, "learning_rate": 4.977028428093646e-06, "loss": 0.2452, "step": 23775 }, { "epoch": 0.94, "grad_norm": 6.6489188278172895, "learning_rate": 4.976986622073579e-06, "loss": 0.2517, "step": 23800 }, { "epoch": 0.94, "grad_norm": 5.577979099426105, "learning_rate": 4.976944816053512e-06, "loss": 0.2605, "step": 23825 }, { "epoch": 0.94, "grad_norm": 6.9466236367411005, "learning_rate": 4.976903010033445e-06, "loss": 0.2655, "step": 23850 }, { "epoch": 0.94, "grad_norm": 5.7414326777762685, "learning_rate": 4.976861204013378e-06, "loss": 0.2304, "step": 23875 }, { "epoch": 0.94, "grad_norm": 5.522499469061028, "learning_rate": 4.976819397993312e-06, "loss": 0.2454, "step": 23900 }, { "epoch": 0.94, "grad_norm": 5.343226074167374, "learning_rate": 4.976777591973244e-06, "loss": 0.2555, "step": 23925 }, { "epoch": 0.94, "grad_norm": 6.908837038372152, "learning_rate": 4.976735785953178e-06, "loss": 0.2479, "step": 23950 }, { "epoch": 0.94, "grad_norm": 5.9308077824937016, "learning_rate": 4.976693979933111e-06, "loss": 0.2571, "step": 23975 }, { "epoch": 0.94, "grad_norm": 6.187889109702333, "learning_rate": 4.976652173913044e-06, "loss": 0.2705, "step": 24000 }, { "epoch": 0.95, "grad_norm": 5.591103804137682, "learning_rate": 4.976610367892978e-06, "loss": 0.2448, "step": 24025 }, { "epoch": 0.95, "grad_norm": 4.396768803047713, "learning_rate": 4.97656856187291e-06, "loss": 0.2243, "step": 24050 }, { "epoch": 0.95, "grad_norm": 4.7909268110643275, "learning_rate": 4.976526755852843e-06, "loss": 0.2332, "step": 24075 }, { "epoch": 0.95, "grad_norm": 3.7576206413020232, "learning_rate": 4.976484949832776e-06, "loss": 0.2549, "step": 24100 }, { "epoch": 0.95, "grad_norm": 5.4611210915597415, "learning_rate": 4.976443143812709e-06, "loss": 0.2458, "step": 24125 }, { "epoch": 0.95, "grad_norm": 6.089932740094683, "learning_rate": 4.976401337792642e-06, "loss": 0.2503, "step": 24150 }, { "epoch": 0.95, "grad_norm": 4.268253661077563, "learning_rate": 4.9763595317725755e-06, "loss": 0.225, "step": 24175 }, { "epoch": 0.95, "grad_norm": 5.204555335361072, "learning_rate": 4.976317725752508e-06, "loss": 0.2713, "step": 24200 }, { "epoch": 0.95, "grad_norm": 6.021816391599869, "learning_rate": 4.976275919732442e-06, "loss": 0.2336, "step": 24225 }, { "epoch": 0.95, "grad_norm": 4.8455887000789835, "learning_rate": 4.9762341137123745e-06, "loss": 0.2279, "step": 24250 }, { "epoch": 0.95, "grad_norm": 7.225031289810486, "learning_rate": 4.976192307692308e-06, "loss": 0.2275, "step": 24275 }, { "epoch": 0.96, "grad_norm": 5.159430367632941, "learning_rate": 4.976150501672241e-06, "loss": 0.2492, "step": 24300 }, { "epoch": 0.96, "grad_norm": 4.127979739902747, "learning_rate": 4.976108695652174e-06, "loss": 0.2336, "step": 24325 }, { "epoch": 0.96, "grad_norm": 6.238071839251816, "learning_rate": 4.976066889632107e-06, "loss": 0.2801, "step": 24350 }, { "epoch": 0.96, "grad_norm": 4.157082993930431, "learning_rate": 4.9760250836120405e-06, "loss": 0.2307, "step": 24375 }, { "epoch": 0.96, "grad_norm": 5.751821236126511, "learning_rate": 4.975983277591974e-06, "loss": 0.2466, "step": 24400 }, { "epoch": 0.96, "grad_norm": 5.723512280716872, "learning_rate": 4.975941471571907e-06, "loss": 0.2533, "step": 24425 }, { "epoch": 0.96, "grad_norm": 6.529999483690673, "learning_rate": 4.97589966555184e-06, "loss": 0.2305, "step": 24450 }, { "epoch": 0.96, "grad_norm": 4.640020922858098, "learning_rate": 4.975857859531773e-06, "loss": 0.24, "step": 24475 }, { "epoch": 0.96, "grad_norm": 5.647549375619507, "learning_rate": 4.9758160535117065e-06, "loss": 0.2444, "step": 24500 }, { "epoch": 0.96, "grad_norm": 4.391480590113418, "learning_rate": 4.975774247491639e-06, "loss": 0.2585, "step": 24525 }, { "epoch": 0.97, "grad_norm": 7.489122500542812, "learning_rate": 4.975732441471573e-06, "loss": 0.2475, "step": 24550 }, { "epoch": 0.97, "grad_norm": 7.499688320980486, "learning_rate": 4.9756906354515055e-06, "loss": 0.2402, "step": 24575 }, { "epoch": 0.97, "grad_norm": 4.331955267278932, "learning_rate": 4.975648829431439e-06, "loss": 0.2444, "step": 24600 }, { "epoch": 0.97, "grad_norm": 7.408758125870933, "learning_rate": 4.975607023411372e-06, "loss": 0.227, "step": 24625 }, { "epoch": 0.97, "grad_norm": 4.756801869357502, "learning_rate": 4.975565217391305e-06, "loss": 0.2472, "step": 24650 }, { "epoch": 0.97, "grad_norm": 5.472101388539881, "learning_rate": 4.975523411371238e-06, "loss": 0.2178, "step": 24675 }, { "epoch": 0.97, "grad_norm": 7.244728796693067, "learning_rate": 4.9754816053511715e-06, "loss": 0.2409, "step": 24700 }, { "epoch": 0.97, "grad_norm": 7.422212395250679, "learning_rate": 4.975439799331104e-06, "loss": 0.2553, "step": 24725 }, { "epoch": 0.97, "grad_norm": 6.733100754021741, "learning_rate": 4.975397993311038e-06, "loss": 0.2423, "step": 24750 }, { "epoch": 0.97, "grad_norm": 5.907380286889155, "learning_rate": 4.97535618729097e-06, "loss": 0.246, "step": 24775 }, { "epoch": 0.98, "grad_norm": 5.336386993051637, "learning_rate": 4.975314381270903e-06, "loss": 0.2437, "step": 24800 }, { "epoch": 0.98, "grad_norm": 6.3744020887867645, "learning_rate": 4.975272575250837e-06, "loss": 0.2537, "step": 24825 }, { "epoch": 0.98, "grad_norm": 5.130606979870505, "learning_rate": 4.975230769230769e-06, "loss": 0.2185, "step": 24850 }, { "epoch": 0.98, "grad_norm": 6.393159176636358, "learning_rate": 4.975188963210703e-06, "loss": 0.2447, "step": 24875 }, { "epoch": 0.98, "grad_norm": 4.37010512506881, "learning_rate": 4.975147157190636e-06, "loss": 0.2537, "step": 24900 }, { "epoch": 0.98, "grad_norm": 5.781150540673855, "learning_rate": 4.975105351170569e-06, "loss": 0.2322, "step": 24925 }, { "epoch": 0.98, "grad_norm": 4.670847613893096, "learning_rate": 4.975063545150502e-06, "loss": 0.2464, "step": 24950 }, { "epoch": 0.98, "grad_norm": 6.727990256628477, "learning_rate": 4.9750217391304354e-06, "loss": 0.2581, "step": 24975 }, { "epoch": 0.98, "grad_norm": 4.544263592893811, "learning_rate": 4.974979933110368e-06, "loss": 0.2628, "step": 25000 }, { "epoch": 0.98, "grad_norm": 5.69470940676318, "learning_rate": 4.974938127090302e-06, "loss": 0.2223, "step": 25025 }, { "epoch": 0.99, "grad_norm": 5.792583847102043, "learning_rate": 4.974896321070234e-06, "loss": 0.2343, "step": 25050 }, { "epoch": 0.99, "grad_norm": 5.870490355918843, "learning_rate": 4.974854515050168e-06, "loss": 0.2473, "step": 25075 }, { "epoch": 0.99, "grad_norm": 6.202473423301913, "learning_rate": 4.974812709030101e-06, "loss": 0.2507, "step": 25100 }, { "epoch": 0.99, "grad_norm": 5.7239036684055895, "learning_rate": 4.974770903010034e-06, "loss": 0.2539, "step": 25125 }, { "epoch": 0.99, "grad_norm": 5.222922132018913, "learning_rate": 4.974729096989967e-06, "loss": 0.2239, "step": 25150 }, { "epoch": 0.99, "grad_norm": 5.957670491347016, "learning_rate": 4.9746872909699e-06, "loss": 0.2482, "step": 25175 }, { "epoch": 0.99, "grad_norm": 4.8279149756498105, "learning_rate": 4.974645484949833e-06, "loss": 0.241, "step": 25200 }, { "epoch": 0.99, "grad_norm": 5.200294070704066, "learning_rate": 4.974605351170569e-06, "loss": 0.242, "step": 25225 }, { "epoch": 0.99, "grad_norm": 4.670126129827119, "learning_rate": 4.974563545150502e-06, "loss": 0.2406, "step": 25250 }, { "epoch": 0.99, "grad_norm": 5.544898567700344, "learning_rate": 4.974521739130435e-06, "loss": 0.2338, "step": 25275 }, { "epoch": 1.0, "grad_norm": 4.806206312623487, "learning_rate": 4.974479933110368e-06, "loss": 0.2265, "step": 25300 }, { "epoch": 1.0, "grad_norm": 5.53793647458314, "learning_rate": 4.9744381270903014e-06, "loss": 0.2345, "step": 25325 }, { "epoch": 1.0, "grad_norm": 5.608502420190892, "learning_rate": 4.974396321070234e-06, "loss": 0.2385, "step": 25350 }, { "epoch": 1.0, "grad_norm": 5.1286787349789815, "learning_rate": 4.974354515050168e-06, "loss": 0.2453, "step": 25375 }, { "epoch": 1.0, "grad_norm": 5.469570857307241, "learning_rate": 4.9743127090301e-06, "loss": 0.2294, "step": 25400 }, { "epoch": 1.0, "grad_norm": 1.3534162143803563, "learning_rate": 4.974270903010034e-06, "loss": 0.2361, "step": 25425 }, { "epoch": 1.0, "grad_norm": 1.706718780670229, "learning_rate": 4.974229096989967e-06, "loss": 0.2079, "step": 25450 }, { "epoch": 1.0, "grad_norm": 1.4059544274203903, "learning_rate": 4.9741872909699e-06, "loss": 0.2031, "step": 25475 }, { "epoch": 1.0, "grad_norm": 1.7037901786451766, "learning_rate": 4.974145484949834e-06, "loss": 0.2252, "step": 25500 }, { "epoch": 1.0, "grad_norm": 1.3558198308855107, "learning_rate": 4.974103678929766e-06, "loss": 0.2154, "step": 25525 }, { "epoch": 1.01, "grad_norm": 1.6682224121680143, "learning_rate": 4.9740618729097e-06, "loss": 0.2129, "step": 25550 }, { "epoch": 1.01, "grad_norm": 1.4577637750260048, "learning_rate": 4.974020066889633e-06, "loss": 0.2024, "step": 25575 }, { "epoch": 1.01, "grad_norm": 1.5087352378433831, "learning_rate": 4.973978260869566e-06, "loss": 0.2109, "step": 25600 }, { "epoch": 1.01, "grad_norm": 1.6402724021534647, "learning_rate": 4.973936454849499e-06, "loss": 0.212, "step": 25625 }, { "epoch": 1.01, "grad_norm": 1.9264263538960484, "learning_rate": 4.9738946488294324e-06, "loss": 0.2143, "step": 25650 }, { "epoch": 1.01, "grad_norm": 1.9174315090045257, "learning_rate": 4.973852842809365e-06, "loss": 0.2117, "step": 25675 }, { "epoch": 1.01, "grad_norm": 1.8429762901411832, "learning_rate": 4.973811036789299e-06, "loss": 0.2059, "step": 25700 }, { "epoch": 1.01, "grad_norm": 1.5033416778790167, "learning_rate": 4.9737692307692305e-06, "loss": 0.182, "step": 25725 }, { "epoch": 1.01, "grad_norm": 1.330321894521951, "learning_rate": 4.973727424749164e-06, "loss": 0.1881, "step": 25750 }, { "epoch": 1.01, "grad_norm": 1.8292089071205324, "learning_rate": 4.973685618729097e-06, "loss": 0.1975, "step": 25775 }, { "epoch": 1.01, "grad_norm": 1.2598690654621236, "learning_rate": 4.97364381270903e-06, "loss": 0.2053, "step": 25800 }, { "epoch": 1.02, "grad_norm": 1.435229607084148, "learning_rate": 4.973602006688963e-06, "loss": 0.2169, "step": 25825 }, { "epoch": 1.02, "grad_norm": 2.1185880545936135, "learning_rate": 4.973560200668897e-06, "loss": 0.1958, "step": 25850 }, { "epoch": 1.02, "grad_norm": 1.3672188771049028, "learning_rate": 4.973518394648829e-06, "loss": 0.2104, "step": 25875 }, { "epoch": 1.02, "grad_norm": 1.863357164049792, "learning_rate": 4.973476588628763e-06, "loss": 0.1857, "step": 25900 }, { "epoch": 1.02, "grad_norm": 1.9990414028836165, "learning_rate": 4.973434782608696e-06, "loss": 0.1921, "step": 25925 }, { "epoch": 1.02, "grad_norm": 1.3154262807639836, "learning_rate": 4.973392976588629e-06, "loss": 0.2266, "step": 25950 }, { "epoch": 1.02, "grad_norm": 1.3950612420595643, "learning_rate": 4.973351170568563e-06, "loss": 0.2064, "step": 25975 }, { "epoch": 1.02, "grad_norm": 1.408288078068823, "learning_rate": 4.973309364548495e-06, "loss": 0.2071, "step": 26000 }, { "epoch": 1.02, "grad_norm": 1.9525039250801453, "learning_rate": 4.973267558528429e-06, "loss": 0.2118, "step": 26025 }, { "epoch": 1.02, "grad_norm": 1.8278710821331015, "learning_rate": 4.9732257525083616e-06, "loss": 0.2262, "step": 26050 }, { "epoch": 1.03, "grad_norm": 1.6148367474628422, "learning_rate": 4.973183946488295e-06, "loss": 0.1902, "step": 26075 }, { "epoch": 1.03, "grad_norm": 1.5038758535892578, "learning_rate": 4.973142140468228e-06, "loss": 0.2051, "step": 26100 }, { "epoch": 1.03, "grad_norm": 1.55344960183899, "learning_rate": 4.973100334448161e-06, "loss": 0.2026, "step": 26125 }, { "epoch": 1.03, "grad_norm": 2.1894008457226986, "learning_rate": 4.973058528428094e-06, "loss": 0.2085, "step": 26150 }, { "epoch": 1.03, "grad_norm": 1.585075493380135, "learning_rate": 4.973016722408028e-06, "loss": 0.2036, "step": 26175 }, { "epoch": 1.03, "grad_norm": 1.8233869368121183, "learning_rate": 4.97297491638796e-06, "loss": 0.2235, "step": 26200 }, { "epoch": 1.03, "grad_norm": 1.8237982393202128, "learning_rate": 4.972934782608696e-06, "loss": 0.2225, "step": 26225 }, { "epoch": 1.03, "grad_norm": 1.870096249638426, "learning_rate": 4.972892976588629e-06, "loss": 0.2277, "step": 26250 }, { "epoch": 1.03, "grad_norm": 2.084326368520519, "learning_rate": 4.972851170568562e-06, "loss": 0.2166, "step": 26275 }, { "epoch": 1.03, "grad_norm": 1.786309173531602, "learning_rate": 4.972809364548495e-06, "loss": 0.1952, "step": 26300 }, { "epoch": 1.04, "grad_norm": 2.118377964073037, "learning_rate": 4.972767558528429e-06, "loss": 0.2035, "step": 26325 }, { "epoch": 1.04, "grad_norm": 1.4408933539946762, "learning_rate": 4.972725752508361e-06, "loss": 0.2101, "step": 26350 }, { "epoch": 1.04, "grad_norm": 1.5247018069177714, "learning_rate": 4.972683946488295e-06, "loss": 0.2146, "step": 26375 }, { "epoch": 1.04, "grad_norm": 1.4099242258025098, "learning_rate": 4.9726421404682275e-06, "loss": 0.2125, "step": 26400 }, { "epoch": 1.04, "grad_norm": 1.385534734677316, "learning_rate": 4.972600334448161e-06, "loss": 0.2031, "step": 26425 }, { "epoch": 1.04, "grad_norm": 1.3518252038997889, "learning_rate": 4.972558528428094e-06, "loss": 0.2054, "step": 26450 }, { "epoch": 1.04, "grad_norm": 1.7923556960029041, "learning_rate": 4.972516722408027e-06, "loss": 0.2048, "step": 26475 }, { "epoch": 1.04, "grad_norm": 1.6394896847363243, "learning_rate": 4.97247491638796e-06, "loss": 0.2172, "step": 26500 }, { "epoch": 1.04, "grad_norm": 2.9050366746717975, "learning_rate": 4.972433110367894e-06, "loss": 0.2072, "step": 26525 }, { "epoch": 1.04, "grad_norm": 1.4611510590944903, "learning_rate": 4.972391304347826e-06, "loss": 0.2026, "step": 26550 }, { "epoch": 1.05, "grad_norm": 1.757163035064764, "learning_rate": 4.97234949832776e-06, "loss": 0.2169, "step": 26575 }, { "epoch": 1.05, "grad_norm": 1.2821175785153787, "learning_rate": 4.9723076923076925e-06, "loss": 0.2181, "step": 26600 }, { "epoch": 1.05, "grad_norm": 1.4184262153912761, "learning_rate": 4.972265886287626e-06, "loss": 0.1958, "step": 26625 }, { "epoch": 1.05, "grad_norm": 1.7602753726816016, "learning_rate": 4.97222408026756e-06, "loss": 0.2009, "step": 26650 }, { "epoch": 1.05, "grad_norm": 2.5974760210540015, "learning_rate": 4.9721822742474915e-06, "loss": 0.2243, "step": 26675 }, { "epoch": 1.05, "grad_norm": 1.645642258789146, "learning_rate": 4.972140468227425e-06, "loss": 0.2038, "step": 26700 }, { "epoch": 1.05, "grad_norm": 1.4401284677266035, "learning_rate": 4.972098662207358e-06, "loss": 0.1898, "step": 26725 }, { "epoch": 1.05, "grad_norm": 1.430617922584249, "learning_rate": 4.972056856187291e-06, "loss": 0.2111, "step": 26750 }, { "epoch": 1.05, "grad_norm": 1.7904074848447293, "learning_rate": 4.972015050167224e-06, "loss": 0.2129, "step": 26775 }, { "epoch": 1.05, "grad_norm": 2.406346980994871, "learning_rate": 4.9719732441471575e-06, "loss": 0.2045, "step": 26800 }, { "epoch": 1.06, "grad_norm": 2.2583526864827905, "learning_rate": 4.97193143812709e-06, "loss": 0.2266, "step": 26825 }, { "epoch": 1.06, "grad_norm": 1.7655942648716196, "learning_rate": 4.971889632107024e-06, "loss": 0.1951, "step": 26850 }, { "epoch": 1.06, "grad_norm": 1.467109180429437, "learning_rate": 4.9718478260869564e-06, "loss": 0.2284, "step": 26875 }, { "epoch": 1.06, "grad_norm": 1.7516265119310985, "learning_rate": 4.97180602006689e-06, "loss": 0.2234, "step": 26900 }, { "epoch": 1.06, "grad_norm": 1.5583524171970238, "learning_rate": 4.971764214046823e-06, "loss": 0.2205, "step": 26925 }, { "epoch": 1.06, "grad_norm": 1.5551307670089498, "learning_rate": 4.971722408026756e-06, "loss": 0.2013, "step": 26950 }, { "epoch": 1.06, "grad_norm": 1.7858242769151977, "learning_rate": 4.971680602006689e-06, "loss": 0.2037, "step": 26975 }, { "epoch": 1.06, "grad_norm": 1.6377397935158822, "learning_rate": 4.9716387959866225e-06, "loss": 0.2002, "step": 27000 }, { "epoch": 1.06, "grad_norm": 1.9240347144018508, "learning_rate": 4.971596989966555e-06, "loss": 0.2101, "step": 27025 }, { "epoch": 1.06, "grad_norm": 1.9628276445796573, "learning_rate": 4.971555183946489e-06, "loss": 0.2132, "step": 27050 }, { "epoch": 1.07, "grad_norm": 0.9744874761939027, "learning_rate": 4.971513377926422e-06, "loss": 0.2101, "step": 27075 }, { "epoch": 1.07, "grad_norm": 1.6399308156848618, "learning_rate": 4.971471571906355e-06, "loss": 0.2181, "step": 27100 }, { "epoch": 1.07, "grad_norm": 1.1026445818397668, "learning_rate": 4.9714297658862885e-06, "loss": 0.197, "step": 27125 }, { "epoch": 1.07, "grad_norm": 2.2558271888800925, "learning_rate": 4.971387959866221e-06, "loss": 0.2128, "step": 27150 }, { "epoch": 1.07, "grad_norm": 1.8603882142931765, "learning_rate": 4.971346153846155e-06, "loss": 0.1947, "step": 27175 }, { "epoch": 1.07, "grad_norm": 1.7503812536338823, "learning_rate": 4.9713043478260875e-06, "loss": 0.1932, "step": 27200 }, { "epoch": 1.07, "grad_norm": 1.567055034127128, "learning_rate": 4.971264214046823e-06, "loss": 0.1959, "step": 27225 }, { "epoch": 1.07, "grad_norm": 2.133487976541747, "learning_rate": 4.971222408026756e-06, "loss": 0.2218, "step": 27250 }, { "epoch": 1.07, "grad_norm": 1.7226015136938018, "learning_rate": 4.9711806020066895e-06, "loss": 0.2246, "step": 27275 }, { "epoch": 1.07, "grad_norm": 1.5324982024611993, "learning_rate": 4.971138795986622e-06, "loss": 0.2249, "step": 27300 }, { "epoch": 1.07, "grad_norm": 1.2666638439290532, "learning_rate": 4.971096989966556e-06, "loss": 0.2023, "step": 27325 }, { "epoch": 1.08, "grad_norm": 1.7936378450293402, "learning_rate": 4.9710551839464885e-06, "loss": 0.2209, "step": 27350 }, { "epoch": 1.08, "grad_norm": 1.4685172845320271, "learning_rate": 4.971013377926422e-06, "loss": 0.212, "step": 27375 }, { "epoch": 1.08, "grad_norm": 1.3475774524686015, "learning_rate": 4.970971571906355e-06, "loss": 0.2038, "step": 27400 }, { "epoch": 1.08, "grad_norm": 1.1356769724282703, "learning_rate": 4.970929765886288e-06, "loss": 0.2052, "step": 27425 }, { "epoch": 1.08, "grad_norm": 2.3461405690608945, "learning_rate": 4.970887959866221e-06, "loss": 0.2077, "step": 27450 }, { "epoch": 1.08, "grad_norm": 1.2787743115282497, "learning_rate": 4.9708461538461545e-06, "loss": 0.1933, "step": 27475 }, { "epoch": 1.08, "grad_norm": 1.710480020197042, "learning_rate": 4.970804347826087e-06, "loss": 0.1945, "step": 27500 }, { "epoch": 1.08, "grad_norm": 1.74469357398459, "learning_rate": 4.970762541806021e-06, "loss": 0.2212, "step": 27525 }, { "epoch": 1.08, "grad_norm": 1.553720300641038, "learning_rate": 4.9707207357859535e-06, "loss": 0.2071, "step": 27550 }, { "epoch": 1.08, "grad_norm": 1.5925560178851168, "learning_rate": 4.970678929765887e-06, "loss": 0.2082, "step": 27575 }, { "epoch": 1.09, "grad_norm": 2.1786027786262885, "learning_rate": 4.97063712374582e-06, "loss": 0.2185, "step": 27600 }, { "epoch": 1.09, "grad_norm": 1.7290929692337984, "learning_rate": 4.970595317725752e-06, "loss": 0.1983, "step": 27625 }, { "epoch": 1.09, "grad_norm": 1.9473623643856743, "learning_rate": 4.970553511705686e-06, "loss": 0.1961, "step": 27650 }, { "epoch": 1.09, "grad_norm": 1.4076384394311243, "learning_rate": 4.970511705685619e-06, "loss": 0.197, "step": 27675 }, { "epoch": 1.09, "grad_norm": 1.9993400148837797, "learning_rate": 4.970469899665552e-06, "loss": 0.1954, "step": 27700 }, { "epoch": 1.09, "grad_norm": 1.271427684128234, "learning_rate": 4.970428093645485e-06, "loss": 0.2048, "step": 27725 }, { "epoch": 1.09, "grad_norm": 2.591987460804486, "learning_rate": 4.9703862876254184e-06, "loss": 0.1951, "step": 27750 }, { "epoch": 1.09, "grad_norm": 1.5791865767097701, "learning_rate": 4.970344481605351e-06, "loss": 0.2057, "step": 27775 }, { "epoch": 1.09, "grad_norm": 2.1896274986314004, "learning_rate": 4.970302675585285e-06, "loss": 0.2098, "step": 27800 }, { "epoch": 1.09, "grad_norm": 1.6903447291808138, "learning_rate": 4.970260869565217e-06, "loss": 0.1987, "step": 27825 }, { "epoch": 1.1, "grad_norm": 1.490242793429607, "learning_rate": 4.970219063545151e-06, "loss": 0.1942, "step": 27850 }, { "epoch": 1.1, "grad_norm": 1.2304358371703357, "learning_rate": 4.970177257525084e-06, "loss": 0.2152, "step": 27875 }, { "epoch": 1.1, "grad_norm": 1.890308908845425, "learning_rate": 4.970135451505017e-06, "loss": 0.2066, "step": 27900 }, { "epoch": 1.1, "grad_norm": 1.5472152303564, "learning_rate": 4.97009364548495e-06, "loss": 0.2295, "step": 27925 }, { "epoch": 1.1, "grad_norm": 1.8402540061996924, "learning_rate": 4.970051839464883e-06, "loss": 0.2103, "step": 27950 }, { "epoch": 1.1, "grad_norm": 1.5521269697737237, "learning_rate": 4.970010033444816e-06, "loss": 0.1894, "step": 27975 }, { "epoch": 1.1, "grad_norm": 1.1789685652154698, "learning_rate": 4.96996822742475e-06, "loss": 0.1998, "step": 28000 }, { "epoch": 1.1, "grad_norm": 1.971887880644852, "learning_rate": 4.969926421404682e-06, "loss": 0.2217, "step": 28025 }, { "epoch": 1.1, "grad_norm": 1.6583518083353077, "learning_rate": 4.969884615384616e-06, "loss": 0.2077, "step": 28050 }, { "epoch": 1.1, "grad_norm": 1.2231691799156768, "learning_rate": 4.969842809364549e-06, "loss": 0.2074, "step": 28075 }, { "epoch": 1.11, "grad_norm": 1.4124682639555552, "learning_rate": 4.969801003344482e-06, "loss": 0.2002, "step": 28100 }, { "epoch": 1.11, "grad_norm": 1.5315963028247968, "learning_rate": 4.969759197324415e-06, "loss": 0.2116, "step": 28125 }, { "epoch": 1.11, "grad_norm": 0.774063539155429, "learning_rate": 4.969717391304348e-06, "loss": 0.2003, "step": 28150 }, { "epoch": 1.11, "grad_norm": 1.3548270043032278, "learning_rate": 4.969675585284282e-06, "loss": 0.2266, "step": 28175 }, { "epoch": 1.11, "grad_norm": 1.5113091523893083, "learning_rate": 4.969633779264215e-06, "loss": 0.2182, "step": 28200 }, { "epoch": 1.11, "grad_norm": 0.9251487332267831, "learning_rate": 4.9695936454849505e-06, "loss": 0.2202, "step": 28225 }, { "epoch": 1.11, "grad_norm": 1.8324301345828689, "learning_rate": 4.969551839464883e-06, "loss": 0.2255, "step": 28250 }, { "epoch": 1.11, "grad_norm": 1.4775932179452143, "learning_rate": 4.969510033444817e-06, "loss": 0.1994, "step": 28275 }, { "epoch": 1.11, "grad_norm": 1.6333919121168157, "learning_rate": 4.969468227424749e-06, "loss": 0.2081, "step": 28300 }, { "epoch": 1.11, "grad_norm": 0.864925684984239, "learning_rate": 4.969426421404683e-06, "loss": 0.1797, "step": 28325 }, { "epoch": 1.12, "grad_norm": 1.9096612346707407, "learning_rate": 4.969384615384616e-06, "loss": 0.206, "step": 28350 }, { "epoch": 1.12, "grad_norm": 1.9576478394690897, "learning_rate": 4.969342809364549e-06, "loss": 0.2197, "step": 28375 }, { "epoch": 1.12, "grad_norm": 1.5136863296140723, "learning_rate": 4.969301003344482e-06, "loss": 0.199, "step": 28400 }, { "epoch": 1.12, "grad_norm": 1.2010364526097577, "learning_rate": 4.9692591973244154e-06, "loss": 0.2107, "step": 28425 }, { "epoch": 1.12, "grad_norm": 1.407440767715517, "learning_rate": 4.969217391304348e-06, "loss": 0.1996, "step": 28450 }, { "epoch": 1.12, "grad_norm": 2.037763973567924, "learning_rate": 4.969175585284282e-06, "loss": 0.1935, "step": 28475 }, { "epoch": 1.12, "grad_norm": 1.7243218825000752, "learning_rate": 4.969133779264214e-06, "loss": 0.2144, "step": 28500 }, { "epoch": 1.12, "grad_norm": 1.973852579552326, "learning_rate": 4.969091973244148e-06, "loss": 0.1863, "step": 28525 }, { "epoch": 1.12, "grad_norm": 1.5798095723052332, "learning_rate": 4.969050167224081e-06, "loss": 0.2177, "step": 28550 }, { "epoch": 1.12, "grad_norm": 1.4624355487653509, "learning_rate": 4.969008361204013e-06, "loss": 0.2158, "step": 28575 }, { "epoch": 1.13, "grad_norm": 1.2634512898863046, "learning_rate": 4.968966555183947e-06, "loss": 0.1805, "step": 28600 }, { "epoch": 1.13, "grad_norm": 1.292016189175459, "learning_rate": 4.9689247491638796e-06, "loss": 0.1972, "step": 28625 }, { "epoch": 1.13, "grad_norm": 1.887658715804907, "learning_rate": 4.968882943143813e-06, "loss": 0.1994, "step": 28650 }, { "epoch": 1.13, "grad_norm": 1.5728950499609098, "learning_rate": 4.968841137123746e-06, "loss": 0.2084, "step": 28675 }, { "epoch": 1.13, "grad_norm": 1.5170270635605654, "learning_rate": 4.968799331103679e-06, "loss": 0.1836, "step": 28700 }, { "epoch": 1.13, "grad_norm": 1.182392469384069, "learning_rate": 4.968757525083612e-06, "loss": 0.1927, "step": 28725 }, { "epoch": 1.13, "grad_norm": 1.467566207323012, "learning_rate": 4.968715719063546e-06, "loss": 0.211, "step": 28750 }, { "epoch": 1.13, "grad_norm": 1.272554103873533, "learning_rate": 4.968673913043478e-06, "loss": 0.2185, "step": 28775 }, { "epoch": 1.13, "grad_norm": 1.8133824165206656, "learning_rate": 4.968632107023412e-06, "loss": 0.2006, "step": 28800 }, { "epoch": 1.13, "grad_norm": 1.1311871034158731, "learning_rate": 4.9685903010033445e-06, "loss": 0.2058, "step": 28825 }, { "epoch": 1.13, "grad_norm": 1.5883722748526226, "learning_rate": 4.968548494983278e-06, "loss": 0.2064, "step": 28850 }, { "epoch": 1.14, "grad_norm": 2.5904261682283662, "learning_rate": 4.968506688963211e-06, "loss": 0.2024, "step": 28875 }, { "epoch": 1.14, "grad_norm": 1.1478828955712104, "learning_rate": 4.968464882943144e-06, "loss": 0.235, "step": 28900 }, { "epoch": 1.14, "grad_norm": 1.630161077340849, "learning_rate": 4.968423076923077e-06, "loss": 0.1974, "step": 28925 }, { "epoch": 1.14, "grad_norm": 1.8662904772136388, "learning_rate": 4.9683812709030106e-06, "loss": 0.2017, "step": 28950 }, { "epoch": 1.14, "grad_norm": 1.8506149538880028, "learning_rate": 4.968339464882943e-06, "loss": 0.2079, "step": 28975 }, { "epoch": 1.14, "grad_norm": 1.7198221954597992, "learning_rate": 4.968297658862877e-06, "loss": 0.2012, "step": 29000 }, { "epoch": 1.14, "grad_norm": 2.2468499981047616, "learning_rate": 4.9682558528428095e-06, "loss": 0.2241, "step": 29025 }, { "epoch": 1.14, "grad_norm": 1.871449485888433, "learning_rate": 4.968214046822743e-06, "loss": 0.2259, "step": 29050 }, { "epoch": 1.14, "grad_norm": 1.2013415537996155, "learning_rate": 4.968172240802676e-06, "loss": 0.1966, "step": 29075 }, { "epoch": 1.14, "grad_norm": 1.8646445635350821, "learning_rate": 4.968130434782609e-06, "loss": 0.2081, "step": 29100 }, { "epoch": 1.15, "grad_norm": 1.7362617133719327, "learning_rate": 4.968088628762542e-06, "loss": 0.2033, "step": 29125 }, { "epoch": 1.15, "grad_norm": 1.9583229334368888, "learning_rate": 4.9680468227424756e-06, "loss": 0.2195, "step": 29150 }, { "epoch": 1.15, "grad_norm": 1.9756496260469272, "learning_rate": 4.968005016722408e-06, "loss": 0.2138, "step": 29175 }, { "epoch": 1.15, "grad_norm": 1.8136961850820263, "learning_rate": 4.967963210702342e-06, "loss": 0.1989, "step": 29200 }, { "epoch": 1.15, "grad_norm": 0.8737358384053303, "learning_rate": 4.9679214046822745e-06, "loss": 0.1914, "step": 29225 }, { "epoch": 1.15, "grad_norm": 1.8604986074744494, "learning_rate": 4.96788127090301e-06, "loss": 0.2203, "step": 29250 }, { "epoch": 1.15, "grad_norm": 1.349940979840139, "learning_rate": 4.967839464882944e-06, "loss": 0.2278, "step": 29275 }, { "epoch": 1.15, "grad_norm": 1.8282393434449213, "learning_rate": 4.9677976588628766e-06, "loss": 0.2236, "step": 29300 }, { "epoch": 1.15, "grad_norm": 2.5180697118545403, "learning_rate": 4.96775585284281e-06, "loss": 0.2302, "step": 29325 }, { "epoch": 1.15, "grad_norm": 1.8114203235294122, "learning_rate": 4.967714046822743e-06, "loss": 0.1866, "step": 29350 }, { "epoch": 1.16, "grad_norm": 1.3440834890531017, "learning_rate": 4.967672240802676e-06, "loss": 0.2201, "step": 29375 }, { "epoch": 1.16, "grad_norm": 2.3107235122075616, "learning_rate": 4.967630434782609e-06, "loss": 0.2028, "step": 29400 }, { "epoch": 1.16, "grad_norm": 1.329479003913603, "learning_rate": 4.967588628762543e-06, "loss": 0.1991, "step": 29425 }, { "epoch": 1.16, "grad_norm": 1.7727406488523492, "learning_rate": 4.967546822742475e-06, "loss": 0.2009, "step": 29450 }, { "epoch": 1.16, "grad_norm": 1.6058323726648749, "learning_rate": 4.967505016722409e-06, "loss": 0.2118, "step": 29475 }, { "epoch": 1.16, "grad_norm": 1.2674018423135895, "learning_rate": 4.9674632107023415e-06, "loss": 0.2054, "step": 29500 }, { "epoch": 1.16, "grad_norm": 1.7825070235459493, "learning_rate": 4.967421404682274e-06, "loss": 0.2254, "step": 29525 }, { "epoch": 1.16, "grad_norm": 2.043836906677806, "learning_rate": 4.967379598662208e-06, "loss": 0.2188, "step": 29550 }, { "epoch": 1.16, "grad_norm": 2.0876415569608375, "learning_rate": 4.9673377926421405e-06, "loss": 0.2041, "step": 29575 }, { "epoch": 1.16, "grad_norm": 1.804802164781188, "learning_rate": 4.967295986622074e-06, "loss": 0.1919, "step": 29600 }, { "epoch": 1.17, "grad_norm": 2.183654886861711, "learning_rate": 4.967254180602007e-06, "loss": 0.1988, "step": 29625 }, { "epoch": 1.17, "grad_norm": 1.8097074308078962, "learning_rate": 4.96721237458194e-06, "loss": 0.2106, "step": 29650 }, { "epoch": 1.17, "grad_norm": 1.8124071979254552, "learning_rate": 4.967170568561873e-06, "loss": 0.2138, "step": 29675 }, { "epoch": 1.17, "grad_norm": 1.4488631236080394, "learning_rate": 4.9671287625418065e-06, "loss": 0.2091, "step": 29700 }, { "epoch": 1.17, "grad_norm": 1.3220655136855233, "learning_rate": 4.967086956521739e-06, "loss": 0.2245, "step": 29725 }, { "epoch": 1.17, "grad_norm": 1.5358184563094297, "learning_rate": 4.967045150501673e-06, "loss": 0.1831, "step": 29750 }, { "epoch": 1.17, "grad_norm": 1.5721622318412813, "learning_rate": 4.9670033444816055e-06, "loss": 0.2164, "step": 29775 }, { "epoch": 1.17, "grad_norm": 1.3569247099530573, "learning_rate": 4.966961538461539e-06, "loss": 0.2083, "step": 29800 }, { "epoch": 1.17, "grad_norm": 1.919524305810182, "learning_rate": 4.966919732441472e-06, "loss": 0.2012, "step": 29825 }, { "epoch": 1.17, "grad_norm": 1.6943687241906875, "learning_rate": 4.966877926421405e-06, "loss": 0.223, "step": 29850 }, { "epoch": 1.18, "grad_norm": 1.6313740896270725, "learning_rate": 4.966836120401338e-06, "loss": 0.2194, "step": 29875 }, { "epoch": 1.18, "grad_norm": 1.6895490084997424, "learning_rate": 4.9667943143812715e-06, "loss": 0.2077, "step": 29900 }, { "epoch": 1.18, "grad_norm": 1.601228861172109, "learning_rate": 4.966752508361204e-06, "loss": 0.1814, "step": 29925 }, { "epoch": 1.18, "grad_norm": 2.219366909148872, "learning_rate": 4.966710702341138e-06, "loss": 0.2117, "step": 29950 }, { "epoch": 1.18, "grad_norm": 1.2499897794948742, "learning_rate": 4.9666688963210704e-06, "loss": 0.1791, "step": 29975 }, { "epoch": 1.18, "grad_norm": 1.3071238159646315, "learning_rate": 4.966627090301004e-06, "loss": 0.2244, "step": 30000 }, { "epoch": 1.18, "eval_loss": 0.5361328125, "eval_runtime": 11518.7802, "eval_samples_per_second": 0.822, "eval_steps_per_second": 0.051, "eval_wer": 0.12534787574602255, "step": 30000 }, { "epoch": 1.18, "grad_norm": 1.7823343242326686, "learning_rate": 4.966585284280937e-06, "loss": 0.2065, "step": 30025 }, { "epoch": 1.18, "grad_norm": 1.5221486291926747, "learning_rate": 4.96654347826087e-06, "loss": 0.1987, "step": 30050 }, { "epoch": 1.18, "grad_norm": 1.319001734849566, "learning_rate": 4.966501672240803e-06, "loss": 0.1983, "step": 30075 }, { "epoch": 1.18, "grad_norm": 1.7989713632094604, "learning_rate": 4.9664598662207365e-06, "loss": 0.2139, "step": 30100 }, { "epoch": 1.19, "grad_norm": 1.6123524206023738, "learning_rate": 4.966418060200669e-06, "loss": 0.2362, "step": 30125 }, { "epoch": 1.19, "grad_norm": 1.1978865463050377, "learning_rate": 4.966376254180603e-06, "loss": 0.2037, "step": 30150 }, { "epoch": 1.19, "grad_norm": 1.3585992261357787, "learning_rate": 4.966334448160535e-06, "loss": 0.2092, "step": 30175 }, { "epoch": 1.19, "grad_norm": 1.5811291808324892, "learning_rate": 4.966292642140468e-06, "loss": 0.195, "step": 30200 }, { "epoch": 1.19, "grad_norm": 1.6853682024289205, "learning_rate": 4.966250836120402e-06, "loss": 0.2238, "step": 30225 }, { "epoch": 1.19, "grad_norm": 1.7722744670231796, "learning_rate": 4.9662107023411375e-06, "loss": 0.2075, "step": 30250 }, { "epoch": 1.19, "grad_norm": 1.881184080308395, "learning_rate": 4.966168896321071e-06, "loss": 0.1813, "step": 30275 }, { "epoch": 1.19, "grad_norm": 1.0946512002383884, "learning_rate": 4.966127090301004e-06, "loss": 0.2055, "step": 30300 }, { "epoch": 1.19, "grad_norm": 1.3313241329178456, "learning_rate": 4.966085284280937e-06, "loss": 0.2141, "step": 30325 }, { "epoch": 1.19, "grad_norm": 1.3531092149433521, "learning_rate": 4.96604347826087e-06, "loss": 0.1969, "step": 30350 }, { "epoch": 1.19, "grad_norm": 1.9444146528281916, "learning_rate": 4.9660016722408035e-06, "loss": 0.2217, "step": 30375 }, { "epoch": 1.2, "grad_norm": 1.1850621451492553, "learning_rate": 4.965959866220736e-06, "loss": 0.2144, "step": 30400 }, { "epoch": 1.2, "grad_norm": 1.8003297169728758, "learning_rate": 4.96591806020067e-06, "loss": 0.1884, "step": 30425 }, { "epoch": 1.2, "grad_norm": 2.301931614004386, "learning_rate": 4.9658762541806025e-06, "loss": 0.2166, "step": 30450 }, { "epoch": 1.2, "grad_norm": 1.4598479583328665, "learning_rate": 4.965834448160535e-06, "loss": 0.2008, "step": 30475 }, { "epoch": 1.2, "grad_norm": 1.4011597406764777, "learning_rate": 4.965792642140469e-06, "loss": 0.2057, "step": 30500 }, { "epoch": 1.2, "grad_norm": 2.0890831441737023, "learning_rate": 4.965750836120401e-06, "loss": 0.2002, "step": 30525 }, { "epoch": 1.2, "grad_norm": 1.0317922862792932, "learning_rate": 4.965709030100335e-06, "loss": 0.2098, "step": 30550 }, { "epoch": 1.2, "grad_norm": 2.3339129809340755, "learning_rate": 4.965667224080268e-06, "loss": 0.2073, "step": 30575 }, { "epoch": 1.2, "grad_norm": 1.3910047992276036, "learning_rate": 4.965625418060201e-06, "loss": 0.2241, "step": 30600 }, { "epoch": 1.2, "grad_norm": 1.937110026502965, "learning_rate": 4.965583612040134e-06, "loss": 0.2048, "step": 30625 }, { "epoch": 1.21, "grad_norm": 2.4355058481059215, "learning_rate": 4.9655418060200674e-06, "loss": 0.2099, "step": 30650 }, { "epoch": 1.21, "grad_norm": 1.5529814044707928, "learning_rate": 4.9655e-06, "loss": 0.1749, "step": 30675 }, { "epoch": 1.21, "grad_norm": 2.247459234736527, "learning_rate": 4.965458193979934e-06, "loss": 0.2111, "step": 30700 }, { "epoch": 1.21, "grad_norm": 1.839503804605756, "learning_rate": 4.965416387959866e-06, "loss": 0.2074, "step": 30725 }, { "epoch": 1.21, "grad_norm": 0.8945095656682283, "learning_rate": 4.9653745819398e-06, "loss": 0.1872, "step": 30750 }, { "epoch": 1.21, "grad_norm": 1.8914319288406296, "learning_rate": 4.965332775919733e-06, "loss": 0.2194, "step": 30775 }, { "epoch": 1.21, "grad_norm": 1.8360889606904163, "learning_rate": 4.965290969899666e-06, "loss": 0.2243, "step": 30800 }, { "epoch": 1.21, "grad_norm": 1.7850880114912815, "learning_rate": 4.965249163879599e-06, "loss": 0.2205, "step": 30825 }, { "epoch": 1.21, "grad_norm": 1.596884049166259, "learning_rate": 4.9652073578595324e-06, "loss": 0.221, "step": 30850 }, { "epoch": 1.21, "grad_norm": 1.9903428122299165, "learning_rate": 4.965165551839465e-06, "loss": 0.2093, "step": 30875 }, { "epoch": 1.22, "grad_norm": 1.5969796027518697, "learning_rate": 4.965123745819399e-06, "loss": 0.2066, "step": 30900 }, { "epoch": 1.22, "grad_norm": 1.4232465281818925, "learning_rate": 4.965081939799331e-06, "loss": 0.1918, "step": 30925 }, { "epoch": 1.22, "grad_norm": 1.6069161329849821, "learning_rate": 4.965040133779265e-06, "loss": 0.2285, "step": 30950 }, { "epoch": 1.22, "grad_norm": 1.2697223955884902, "learning_rate": 4.964998327759198e-06, "loss": 0.2016, "step": 30975 }, { "epoch": 1.22, "grad_norm": 1.2930314056749161, "learning_rate": 4.964956521739131e-06, "loss": 0.2227, "step": 31000 }, { "epoch": 1.22, "grad_norm": 1.1212062680858963, "learning_rate": 4.964914715719064e-06, "loss": 0.2011, "step": 31025 }, { "epoch": 1.22, "grad_norm": 1.9658032132564844, "learning_rate": 4.964872909698997e-06, "loss": 0.2285, "step": 31050 }, { "epoch": 1.22, "grad_norm": 2.3693017625531883, "learning_rate": 4.96483110367893e-06, "loss": 0.216, "step": 31075 }, { "epoch": 1.22, "grad_norm": 1.9956647662239217, "learning_rate": 4.964789297658864e-06, "loss": 0.2185, "step": 31100 }, { "epoch": 1.22, "grad_norm": 1.202282939519141, "learning_rate": 4.964747491638796e-06, "loss": 0.2048, "step": 31125 }, { "epoch": 1.23, "grad_norm": 1.6666987073505568, "learning_rate": 4.964705685618729e-06, "loss": 0.2308, "step": 31150 }, { "epoch": 1.23, "grad_norm": 1.9958955477076095, "learning_rate": 4.964663879598663e-06, "loss": 0.2031, "step": 31175 }, { "epoch": 1.23, "grad_norm": 2.4079118493249374, "learning_rate": 4.964622073578595e-06, "loss": 0.207, "step": 31200 }, { "epoch": 1.23, "grad_norm": 1.6437168762178533, "learning_rate": 4.964580267558529e-06, "loss": 0.2088, "step": 31225 }, { "epoch": 1.23, "grad_norm": 2.0817719152582503, "learning_rate": 4.964540133779265e-06, "loss": 0.1928, "step": 31250 }, { "epoch": 1.23, "grad_norm": 0.9167781534885544, "learning_rate": 4.964498327759198e-06, "loss": 0.1914, "step": 31275 }, { "epoch": 1.23, "grad_norm": 1.9066328276379372, "learning_rate": 4.964456521739131e-06, "loss": 0.2069, "step": 31300 }, { "epoch": 1.23, "grad_norm": 1.9776175873759618, "learning_rate": 4.9644147157190645e-06, "loss": 0.2121, "step": 31325 }, { "epoch": 1.23, "grad_norm": 1.3144155588840976, "learning_rate": 4.964372909698997e-06, "loss": 0.1927, "step": 31350 }, { "epoch": 1.23, "grad_norm": 1.4071128683786893, "learning_rate": 4.96433110367893e-06, "loss": 0.1953, "step": 31375 }, { "epoch": 1.24, "grad_norm": 1.0973251877720371, "learning_rate": 4.964289297658863e-06, "loss": 0.2166, "step": 31400 }, { "epoch": 1.24, "grad_norm": 1.6240742135179362, "learning_rate": 4.964247491638796e-06, "loss": 0.2036, "step": 31425 }, { "epoch": 1.24, "grad_norm": 1.9782177053431977, "learning_rate": 4.96420568561873e-06, "loss": 0.2265, "step": 31450 }, { "epoch": 1.24, "grad_norm": 1.5819617657187213, "learning_rate": 4.964163879598662e-06, "loss": 0.1893, "step": 31475 }, { "epoch": 1.24, "grad_norm": 2.0167586079498867, "learning_rate": 4.964122073578596e-06, "loss": 0.2175, "step": 31500 }, { "epoch": 1.24, "grad_norm": 1.6880449215025803, "learning_rate": 4.964080267558529e-06, "loss": 0.2047, "step": 31525 }, { "epoch": 1.24, "grad_norm": 1.3911746414902157, "learning_rate": 4.964038461538462e-06, "loss": 0.2083, "step": 31550 }, { "epoch": 1.24, "grad_norm": 1.6077306397848325, "learning_rate": 4.963996655518395e-06, "loss": 0.2038, "step": 31575 }, { "epoch": 1.24, "grad_norm": 0.8547104842544427, "learning_rate": 4.963954849498328e-06, "loss": 0.1909, "step": 31600 }, { "epoch": 1.24, "grad_norm": 1.2230790698786718, "learning_rate": 4.963913043478261e-06, "loss": 0.2144, "step": 31625 }, { "epoch": 1.25, "grad_norm": 1.6684024560293576, "learning_rate": 4.963871237458195e-06, "loss": 0.2259, "step": 31650 }, { "epoch": 1.25, "grad_norm": 1.4888511188162146, "learning_rate": 4.963829431438127e-06, "loss": 0.1831, "step": 31675 }, { "epoch": 1.25, "grad_norm": 2.078967899199761, "learning_rate": 4.963787625418061e-06, "loss": 0.2105, "step": 31700 }, { "epoch": 1.25, "grad_norm": 1.510441172072911, "learning_rate": 4.9637458193979936e-06, "loss": 0.1981, "step": 31725 }, { "epoch": 1.25, "grad_norm": 1.646878944946446, "learning_rate": 4.963704013377927e-06, "loss": 0.2097, "step": 31750 }, { "epoch": 1.25, "grad_norm": 1.8328540352865563, "learning_rate": 4.96366220735786e-06, "loss": 0.1886, "step": 31775 }, { "epoch": 1.25, "grad_norm": 1.1374340269320808, "learning_rate": 4.963620401337793e-06, "loss": 0.191, "step": 31800 }, { "epoch": 1.25, "grad_norm": 1.9988391709765514, "learning_rate": 4.963578595317726e-06, "loss": 0.2059, "step": 31825 }, { "epoch": 1.25, "grad_norm": 1.1328657514428446, "learning_rate": 4.96353678929766e-06, "loss": 0.2285, "step": 31850 }, { "epoch": 1.25, "grad_norm": 1.9394059197287883, "learning_rate": 4.963494983277592e-06, "loss": 0.2108, "step": 31875 }, { "epoch": 1.25, "grad_norm": 1.0461897291306157, "learning_rate": 4.963453177257526e-06, "loss": 0.2116, "step": 31900 }, { "epoch": 1.26, "grad_norm": 1.3526856502795168, "learning_rate": 4.9634113712374585e-06, "loss": 0.2267, "step": 31925 }, { "epoch": 1.26, "grad_norm": 1.8917535020913774, "learning_rate": 4.963369565217392e-06, "loss": 0.2163, "step": 31950 }, { "epoch": 1.26, "grad_norm": 1.4229763247970235, "learning_rate": 4.963327759197325e-06, "loss": 0.1974, "step": 31975 }, { "epoch": 1.26, "grad_norm": 2.091442768892304, "learning_rate": 4.963285953177258e-06, "loss": 0.2062, "step": 32000 }, { "epoch": 1.26, "grad_norm": 1.513372953462432, "learning_rate": 4.963244147157191e-06, "loss": 0.2136, "step": 32025 }, { "epoch": 1.26, "grad_norm": 1.43008542750968, "learning_rate": 4.9632023411371246e-06, "loss": 0.2091, "step": 32050 }, { "epoch": 1.26, "grad_norm": 2.199026863323161, "learning_rate": 4.963160535117057e-06, "loss": 0.2209, "step": 32075 }, { "epoch": 1.26, "grad_norm": 1.5908105703435922, "learning_rate": 4.96311872909699e-06, "loss": 0.2005, "step": 32100 }, { "epoch": 1.26, "grad_norm": 2.4953338699146124, "learning_rate": 4.9630769230769235e-06, "loss": 0.2449, "step": 32125 }, { "epoch": 1.26, "grad_norm": 1.502883147552933, "learning_rate": 4.963035117056856e-06, "loss": 0.2131, "step": 32150 }, { "epoch": 1.27, "grad_norm": 1.751577299753483, "learning_rate": 4.96299331103679e-06, "loss": 0.217, "step": 32175 }, { "epoch": 1.27, "grad_norm": 1.394809862880557, "learning_rate": 4.9629515050167225e-06, "loss": 0.2091, "step": 32200 }, { "epoch": 1.27, "grad_norm": 2.2060001394612345, "learning_rate": 4.962909698996656e-06, "loss": 0.2069, "step": 32225 }, { "epoch": 1.27, "grad_norm": 1.9708415738072285, "learning_rate": 4.962869565217392e-06, "loss": 0.2026, "step": 32250 }, { "epoch": 1.27, "grad_norm": 2.001205087461328, "learning_rate": 4.962827759197325e-06, "loss": 0.2032, "step": 32275 }, { "epoch": 1.27, "grad_norm": 1.266515112142646, "learning_rate": 4.962785953177258e-06, "loss": 0.2161, "step": 32300 }, { "epoch": 1.27, "grad_norm": 1.5658738110557742, "learning_rate": 4.962744147157191e-06, "loss": 0.196, "step": 32325 }, { "epoch": 1.27, "grad_norm": 1.9588945387829928, "learning_rate": 4.962702341137124e-06, "loss": 0.2175, "step": 32350 }, { "epoch": 1.27, "grad_norm": 1.5897518353484381, "learning_rate": 4.962660535117057e-06, "loss": 0.2134, "step": 32375 }, { "epoch": 1.27, "grad_norm": 1.2728706316819134, "learning_rate": 4.9626187290969906e-06, "loss": 0.1953, "step": 32400 }, { "epoch": 1.28, "grad_norm": 2.391470189830352, "learning_rate": 4.962576923076923e-06, "loss": 0.21, "step": 32425 }, { "epoch": 1.28, "grad_norm": 1.5455063030077856, "learning_rate": 4.962535117056857e-06, "loss": 0.1955, "step": 32450 }, { "epoch": 1.28, "grad_norm": 1.8416205159711543, "learning_rate": 4.9624933110367895e-06, "loss": 0.1971, "step": 32475 }, { "epoch": 1.28, "grad_norm": 1.6448546575874654, "learning_rate": 4.962451505016723e-06, "loss": 0.2074, "step": 32500 }, { "epoch": 1.28, "grad_norm": 3.3357850095647246, "learning_rate": 4.962409698996656e-06, "loss": 0.2314, "step": 32525 }, { "epoch": 1.28, "grad_norm": 1.9861129354509446, "learning_rate": 4.962367892976589e-06, "loss": 0.2014, "step": 32550 }, { "epoch": 1.28, "grad_norm": 1.3127887409214314, "learning_rate": 4.962326086956522e-06, "loss": 0.2087, "step": 32575 }, { "epoch": 1.28, "grad_norm": 1.5428255216234126, "learning_rate": 4.9622842809364555e-06, "loss": 0.1927, "step": 32600 }, { "epoch": 1.28, "grad_norm": 2.265547190895961, "learning_rate": 4.962242474916388e-06, "loss": 0.2127, "step": 32625 }, { "epoch": 1.28, "grad_norm": 1.1891946036863335, "learning_rate": 4.962200668896322e-06, "loss": 0.1993, "step": 32650 }, { "epoch": 1.29, "grad_norm": 1.7958715126981217, "learning_rate": 4.9621588628762545e-06, "loss": 0.214, "step": 32675 }, { "epoch": 1.29, "grad_norm": 1.6405593238103398, "learning_rate": 4.962117056856188e-06, "loss": 0.2097, "step": 32700 }, { "epoch": 1.29, "grad_norm": 1.5373448233299254, "learning_rate": 4.962075250836121e-06, "loss": 0.2151, "step": 32725 }, { "epoch": 1.29, "grad_norm": 2.0068111663697987, "learning_rate": 4.962033444816054e-06, "loss": 0.1893, "step": 32750 }, { "epoch": 1.29, "grad_norm": 2.0679584686902626, "learning_rate": 4.961991638795987e-06, "loss": 0.2236, "step": 32775 }, { "epoch": 1.29, "grad_norm": 1.2454742075576373, "learning_rate": 4.9619498327759205e-06, "loss": 0.1778, "step": 32800 }, { "epoch": 1.29, "grad_norm": 1.494891691231001, "learning_rate": 4.961908026755853e-06, "loss": 0.2111, "step": 32825 }, { "epoch": 1.29, "grad_norm": 1.972901323601928, "learning_rate": 4.961866220735787e-06, "loss": 0.2329, "step": 32850 }, { "epoch": 1.29, "grad_norm": 1.4863004691478272, "learning_rate": 4.9618244147157195e-06, "loss": 0.2036, "step": 32875 }, { "epoch": 1.29, "grad_norm": 1.2875445717253988, "learning_rate": 4.961782608695653e-06, "loss": 0.2026, "step": 32900 }, { "epoch": 1.3, "grad_norm": 1.5038045805937086, "learning_rate": 4.961740802675586e-06, "loss": 0.207, "step": 32925 }, { "epoch": 1.3, "grad_norm": 1.649781568126332, "learning_rate": 4.961698996655519e-06, "loss": 0.1957, "step": 32950 }, { "epoch": 1.3, "grad_norm": 1.451803928656282, "learning_rate": 4.961657190635452e-06, "loss": 0.2139, "step": 32975 }, { "epoch": 1.3, "grad_norm": 2.096939736947867, "learning_rate": 4.9616153846153855e-06, "loss": 0.2167, "step": 33000 }, { "epoch": 1.3, "grad_norm": 1.5804173291083985, "learning_rate": 4.961573578595318e-06, "loss": 0.2084, "step": 33025 }, { "epoch": 1.3, "grad_norm": 1.900713897980342, "learning_rate": 4.961531772575251e-06, "loss": 0.2094, "step": 33050 }, { "epoch": 1.3, "grad_norm": 1.8047019720202782, "learning_rate": 4.9614899665551844e-06, "loss": 0.2045, "step": 33075 }, { "epoch": 1.3, "grad_norm": 1.663316027429542, "learning_rate": 4.961448160535117e-06, "loss": 0.2075, "step": 33100 }, { "epoch": 1.3, "grad_norm": 1.9090718281643422, "learning_rate": 4.961406354515051e-06, "loss": 0.1984, "step": 33125 }, { "epoch": 1.3, "grad_norm": 1.8038054052477845, "learning_rate": 4.961364548494983e-06, "loss": 0.2071, "step": 33150 }, { "epoch": 1.31, "grad_norm": 1.9119519029509469, "learning_rate": 4.961322742474917e-06, "loss": 0.2132, "step": 33175 }, { "epoch": 1.31, "grad_norm": 1.9349267181687344, "learning_rate": 4.96128093645485e-06, "loss": 0.2159, "step": 33200 }, { "epoch": 1.31, "grad_norm": 1.550384787253934, "learning_rate": 4.961239130434783e-06, "loss": 0.1922, "step": 33225 }, { "epoch": 1.31, "grad_norm": 2.1539016786904788, "learning_rate": 4.961198996655519e-06, "loss": 0.2158, "step": 33250 }, { "epoch": 1.31, "grad_norm": 2.1496086242086543, "learning_rate": 4.961157190635452e-06, "loss": 0.2176, "step": 33275 }, { "epoch": 1.31, "grad_norm": 1.6905461372752362, "learning_rate": 4.961115384615384e-06, "loss": 0.1866, "step": 33300 }, { "epoch": 1.31, "grad_norm": 1.1509723343168803, "learning_rate": 4.961073578595318e-06, "loss": 0.2218, "step": 33325 }, { "epoch": 1.31, "grad_norm": 1.0867727056995953, "learning_rate": 4.961031772575251e-06, "loss": 0.2073, "step": 33350 }, { "epoch": 1.31, "grad_norm": 1.5010059727175604, "learning_rate": 4.960989966555184e-06, "loss": 0.2009, "step": 33375 }, { "epoch": 1.31, "grad_norm": 2.3178677208541214, "learning_rate": 4.960948160535117e-06, "loss": 0.2053, "step": 33400 }, { "epoch": 1.31, "grad_norm": 1.5525133283022474, "learning_rate": 4.9609063545150504e-06, "loss": 0.1934, "step": 33425 }, { "epoch": 1.32, "grad_norm": 1.5592614964414115, "learning_rate": 4.960864548494984e-06, "loss": 0.2024, "step": 33450 }, { "epoch": 1.32, "grad_norm": 1.1553832312538863, "learning_rate": 4.960822742474917e-06, "loss": 0.2109, "step": 33475 }, { "epoch": 1.32, "grad_norm": 1.8607289492966907, "learning_rate": 4.96078093645485e-06, "loss": 0.1962, "step": 33500 }, { "epoch": 1.32, "grad_norm": 1.503789861400716, "learning_rate": 4.960739130434783e-06, "loss": 0.2088, "step": 33525 }, { "epoch": 1.32, "grad_norm": 1.8929282998591728, "learning_rate": 4.9606973244147165e-06, "loss": 0.2209, "step": 33550 }, { "epoch": 1.32, "grad_norm": 1.0496277495832924, "learning_rate": 4.960655518394649e-06, "loss": 0.1954, "step": 33575 }, { "epoch": 1.32, "grad_norm": 1.4165638236647178, "learning_rate": 4.960613712374583e-06, "loss": 0.184, "step": 33600 }, { "epoch": 1.32, "grad_norm": 1.3598210314527397, "learning_rate": 4.960571906354515e-06, "loss": 0.2002, "step": 33625 }, { "epoch": 1.32, "grad_norm": 1.4783079603213292, "learning_rate": 4.960530100334449e-06, "loss": 0.2033, "step": 33650 }, { "epoch": 1.32, "grad_norm": 1.9540334215512212, "learning_rate": 4.960488294314382e-06, "loss": 0.2249, "step": 33675 }, { "epoch": 1.33, "grad_norm": 1.1034996218987867, "learning_rate": 4.960446488294315e-06, "loss": 0.1903, "step": 33700 }, { "epoch": 1.33, "grad_norm": 2.3139815085334834, "learning_rate": 4.960404682274248e-06, "loss": 0.1939, "step": 33725 }, { "epoch": 1.33, "grad_norm": 0.9968667345143176, "learning_rate": 4.9603628762541814e-06, "loss": 0.2057, "step": 33750 }, { "epoch": 1.33, "grad_norm": 1.5320555945517205, "learning_rate": 4.960321070234114e-06, "loss": 0.2073, "step": 33775 }, { "epoch": 1.33, "grad_norm": 1.282025684245721, "learning_rate": 4.960279264214048e-06, "loss": 0.2133, "step": 33800 }, { "epoch": 1.33, "grad_norm": 1.777269893458473, "learning_rate": 4.96023745819398e-06, "loss": 0.2243, "step": 33825 }, { "epoch": 1.33, "grad_norm": 1.5943182163866918, "learning_rate": 4.960195652173914e-06, "loss": 0.2153, "step": 33850 }, { "epoch": 1.33, "grad_norm": 1.379151340617046, "learning_rate": 4.960153846153847e-06, "loss": 0.2043, "step": 33875 }, { "epoch": 1.33, "grad_norm": 1.1521103544665638, "learning_rate": 4.96011204013378e-06, "loss": 0.2238, "step": 33900 }, { "epoch": 1.33, "grad_norm": 1.2182907874261988, "learning_rate": 4.960070234113713e-06, "loss": 0.2097, "step": 33925 }, { "epoch": 1.34, "grad_norm": 1.387589834089496, "learning_rate": 4.960028428093646e-06, "loss": 0.1899, "step": 33950 }, { "epoch": 1.34, "grad_norm": 1.5191996191680612, "learning_rate": 4.959986622073579e-06, "loss": 0.2107, "step": 33975 }, { "epoch": 1.34, "grad_norm": 1.4337854691253606, "learning_rate": 4.959944816053512e-06, "loss": 0.2131, "step": 34000 }, { "epoch": 1.34, "grad_norm": 1.8589806989817563, "learning_rate": 4.959903010033445e-06, "loss": 0.2162, "step": 34025 }, { "epoch": 1.34, "grad_norm": 2.303077233635094, "learning_rate": 4.959861204013378e-06, "loss": 0.203, "step": 34050 }, { "epoch": 1.34, "grad_norm": 1.5672615001886725, "learning_rate": 4.959819397993312e-06, "loss": 0.2073, "step": 34075 }, { "epoch": 1.34, "grad_norm": 1.163916859493643, "learning_rate": 4.959777591973244e-06, "loss": 0.2, "step": 34100 }, { "epoch": 1.34, "grad_norm": 1.6051761264160214, "learning_rate": 4.959735785953178e-06, "loss": 0.2052, "step": 34125 }, { "epoch": 1.34, "grad_norm": 1.3556812408093033, "learning_rate": 4.9596939799331106e-06, "loss": 0.2308, "step": 34150 }, { "epoch": 1.34, "grad_norm": 1.3137216096393576, "learning_rate": 4.959652173913044e-06, "loss": 0.2077, "step": 34175 }, { "epoch": 1.35, "grad_norm": 1.5157679496546062, "learning_rate": 4.959610367892977e-06, "loss": 0.2038, "step": 34200 }, { "epoch": 1.35, "grad_norm": 2.236311175171304, "learning_rate": 4.95956856187291e-06, "loss": 0.1943, "step": 34225 }, { "epoch": 1.35, "grad_norm": 1.4890581893083967, "learning_rate": 4.959528428093645e-06, "loss": 0.2026, "step": 34250 }, { "epoch": 1.35, "grad_norm": 2.0697802575341915, "learning_rate": 4.959486622073579e-06, "loss": 0.2175, "step": 34275 }, { "epoch": 1.35, "grad_norm": 2.0491147263632876, "learning_rate": 4.9594448160535116e-06, "loss": 0.2117, "step": 34300 }, { "epoch": 1.35, "grad_norm": 1.46097970439395, "learning_rate": 4.959403010033445e-06, "loss": 0.1839, "step": 34325 }, { "epoch": 1.35, "grad_norm": 2.014542005118715, "learning_rate": 4.959361204013378e-06, "loss": 0.2171, "step": 34350 }, { "epoch": 1.35, "grad_norm": 1.3888128070932415, "learning_rate": 4.959319397993311e-06, "loss": 0.1863, "step": 34375 }, { "epoch": 1.35, "grad_norm": 1.7106357207309615, "learning_rate": 4.959277591973244e-06, "loss": 0.1865, "step": 34400 }, { "epoch": 1.35, "grad_norm": 1.7455015346010385, "learning_rate": 4.959235785953178e-06, "loss": 0.2059, "step": 34425 }, { "epoch": 1.36, "grad_norm": 0.9415841930783881, "learning_rate": 4.95919397993311e-06, "loss": 0.2034, "step": 34450 }, { "epoch": 1.36, "grad_norm": 2.3452049148270158, "learning_rate": 4.959152173913044e-06, "loss": 0.2055, "step": 34475 }, { "epoch": 1.36, "grad_norm": 1.4039121644682209, "learning_rate": 4.9591103678929765e-06, "loss": 0.1914, "step": 34500 }, { "epoch": 1.36, "grad_norm": 1.4796081378428405, "learning_rate": 4.95906856187291e-06, "loss": 0.2172, "step": 34525 }, { "epoch": 1.36, "grad_norm": 1.9045711516273565, "learning_rate": 4.959026755852843e-06, "loss": 0.1927, "step": 34550 }, { "epoch": 1.36, "grad_norm": 1.7547093039225692, "learning_rate": 4.958984949832776e-06, "loss": 0.228, "step": 34575 }, { "epoch": 1.36, "grad_norm": 1.9785998947274708, "learning_rate": 4.95894314381271e-06, "loss": 0.2026, "step": 34600 }, { "epoch": 1.36, "grad_norm": 1.2354236137870933, "learning_rate": 4.958901337792643e-06, "loss": 0.214, "step": 34625 }, { "epoch": 1.36, "grad_norm": 1.3622120646886287, "learning_rate": 4.958859531772576e-06, "loss": 0.1868, "step": 34650 }, { "epoch": 1.36, "grad_norm": 1.573347646006675, "learning_rate": 4.958817725752509e-06, "loss": 0.1997, "step": 34675 }, { "epoch": 1.37, "grad_norm": 1.8322249477184158, "learning_rate": 4.958775919732442e-06, "loss": 0.2188, "step": 34700 }, { "epoch": 1.37, "grad_norm": 1.4524561976880421, "learning_rate": 4.958734113712375e-06, "loss": 0.2097, "step": 34725 }, { "epoch": 1.37, "grad_norm": 1.259548939098752, "learning_rate": 4.958692307692309e-06, "loss": 0.2152, "step": 34750 }, { "epoch": 1.37, "grad_norm": 1.477510113801705, "learning_rate": 4.958650501672241e-06, "loss": 0.1898, "step": 34775 }, { "epoch": 1.37, "grad_norm": 1.2690260150220294, "learning_rate": 4.958608695652175e-06, "loss": 0.2182, "step": 34800 }, { "epoch": 1.37, "grad_norm": 1.5844701742556588, "learning_rate": 4.9585668896321076e-06, "loss": 0.1961, "step": 34825 }, { "epoch": 1.37, "grad_norm": 2.273853459237293, "learning_rate": 4.958525083612041e-06, "loss": 0.1922, "step": 34850 }, { "epoch": 1.37, "grad_norm": 1.8175047287317434, "learning_rate": 4.958483277591974e-06, "loss": 0.2038, "step": 34875 }, { "epoch": 1.37, "grad_norm": 1.836042614757162, "learning_rate": 4.958441471571907e-06, "loss": 0.2168, "step": 34900 }, { "epoch": 1.37, "grad_norm": 1.3796229553862953, "learning_rate": 4.958399665551839e-06, "loss": 0.2062, "step": 34925 }, { "epoch": 1.37, "grad_norm": 2.3102116543430355, "learning_rate": 4.958357859531773e-06, "loss": 0.2078, "step": 34950 }, { "epoch": 1.38, "grad_norm": 1.360851412118792, "learning_rate": 4.9583160535117054e-06, "loss": 0.1949, "step": 34975 }, { "epoch": 1.38, "grad_norm": 1.2283119602827541, "learning_rate": 4.958274247491639e-06, "loss": 0.2074, "step": 35000 }, { "epoch": 1.38, "grad_norm": 2.13008899942473, "learning_rate": 4.9582324414715725e-06, "loss": 0.2098, "step": 35025 }, { "epoch": 1.38, "grad_norm": 1.033370788229089, "learning_rate": 4.958190635451505e-06, "loss": 0.2019, "step": 35050 }, { "epoch": 1.38, "grad_norm": 1.2650220644179653, "learning_rate": 4.958148829431439e-06, "loss": 0.2119, "step": 35075 }, { "epoch": 1.38, "grad_norm": 1.6882292388102216, "learning_rate": 4.9581070234113715e-06, "loss": 0.204, "step": 35100 }, { "epoch": 1.38, "grad_norm": 1.5380929836431863, "learning_rate": 4.958065217391305e-06, "loss": 0.2127, "step": 35125 }, { "epoch": 1.38, "grad_norm": 2.1509889226964374, "learning_rate": 4.958023411371238e-06, "loss": 0.1943, "step": 35150 }, { "epoch": 1.38, "grad_norm": 2.4326488451072428, "learning_rate": 4.957981605351171e-06, "loss": 0.1855, "step": 35175 }, { "epoch": 1.38, "grad_norm": 1.383235843774178, "learning_rate": 4.957939799331104e-06, "loss": 0.2132, "step": 35200 }, { "epoch": 1.39, "grad_norm": 1.5270733379843522, "learning_rate": 4.9578979933110375e-06, "loss": 0.1998, "step": 35225 }, { "epoch": 1.39, "grad_norm": 1.4047334942033511, "learning_rate": 4.9578578595317725e-06, "loss": 0.2344, "step": 35250 }, { "epoch": 1.39, "grad_norm": 1.3056747609702148, "learning_rate": 4.957816053511706e-06, "loss": 0.1981, "step": 35275 }, { "epoch": 1.39, "grad_norm": 1.8432393756629464, "learning_rate": 4.957774247491639e-06, "loss": 0.1912, "step": 35300 }, { "epoch": 1.39, "grad_norm": 1.255197218187179, "learning_rate": 4.957732441471572e-06, "loss": 0.1947, "step": 35325 }, { "epoch": 1.39, "grad_norm": 1.873243680140684, "learning_rate": 4.957690635451505e-06, "loss": 0.2078, "step": 35350 }, { "epoch": 1.39, "grad_norm": 2.048000455141224, "learning_rate": 4.9576488294314385e-06, "loss": 0.2059, "step": 35375 }, { "epoch": 1.39, "grad_norm": 3.4680651759429137, "learning_rate": 4.957607023411371e-06, "loss": 0.22, "step": 35400 }, { "epoch": 1.39, "grad_norm": 1.53131994025086, "learning_rate": 4.957565217391305e-06, "loss": 0.2256, "step": 35425 }, { "epoch": 1.39, "grad_norm": 1.8548841688370339, "learning_rate": 4.9575234113712375e-06, "loss": 0.2143, "step": 35450 }, { "epoch": 1.4, "grad_norm": 1.6403964177995343, "learning_rate": 4.957481605351171e-06, "loss": 0.2128, "step": 35475 }, { "epoch": 1.4, "grad_norm": 1.6156960137162244, "learning_rate": 4.957439799331104e-06, "loss": 0.2203, "step": 35500 }, { "epoch": 1.4, "grad_norm": 1.5593538805691032, "learning_rate": 4.957397993311037e-06, "loss": 0.2111, "step": 35525 }, { "epoch": 1.4, "grad_norm": 1.3940280246709056, "learning_rate": 4.95735618729097e-06, "loss": 0.2053, "step": 35550 }, { "epoch": 1.4, "grad_norm": 2.2706106350468436, "learning_rate": 4.9573143812709035e-06, "loss": 0.2074, "step": 35575 }, { "epoch": 1.4, "grad_norm": 0.9178553441649687, "learning_rate": 4.957272575250836e-06, "loss": 0.1981, "step": 35600 }, { "epoch": 1.4, "grad_norm": 1.4261776676677842, "learning_rate": 4.95723076923077e-06, "loss": 0.1853, "step": 35625 }, { "epoch": 1.4, "grad_norm": 1.1696659611524045, "learning_rate": 4.9571889632107024e-06, "loss": 0.2134, "step": 35650 }, { "epoch": 1.4, "grad_norm": 0.7602557743841436, "learning_rate": 4.957147157190636e-06, "loss": 0.2126, "step": 35675 }, { "epoch": 1.4, "grad_norm": 2.492907454571274, "learning_rate": 4.9571053511705695e-06, "loss": 0.216, "step": 35700 }, { "epoch": 1.41, "grad_norm": 2.367209562020745, "learning_rate": 4.957063545150502e-06, "loss": 0.2195, "step": 35725 }, { "epoch": 1.41, "grad_norm": 1.8227019149029202, "learning_rate": 4.957021739130436e-06, "loss": 0.1937, "step": 35750 }, { "epoch": 1.41, "grad_norm": 1.2682518351707983, "learning_rate": 4.9569799331103685e-06, "loss": 0.2019, "step": 35775 }, { "epoch": 1.41, "grad_norm": 1.262927556471107, "learning_rate": 4.956938127090302e-06, "loss": 0.1928, "step": 35800 }, { "epoch": 1.41, "grad_norm": 1.42063661058105, "learning_rate": 4.956896321070235e-06, "loss": 0.2019, "step": 35825 }, { "epoch": 1.41, "grad_norm": 2.4089099655471107, "learning_rate": 4.956854515050168e-06, "loss": 0.2014, "step": 35850 }, { "epoch": 1.41, "grad_norm": 1.0092033649001106, "learning_rate": 4.9568127090301e-06, "loss": 0.2002, "step": 35875 }, { "epoch": 1.41, "grad_norm": 1.9427722122886302, "learning_rate": 4.956770903010034e-06, "loss": 0.2208, "step": 35900 }, { "epoch": 1.41, "grad_norm": 1.6812779714137775, "learning_rate": 4.956729096989966e-06, "loss": 0.1991, "step": 35925 }, { "epoch": 1.41, "grad_norm": 1.3752830801753009, "learning_rate": 4.9566872909699e-06, "loss": 0.2051, "step": 35950 }, { "epoch": 1.42, "grad_norm": 1.6857256233320776, "learning_rate": 4.956645484949833e-06, "loss": 0.206, "step": 35975 }, { "epoch": 1.42, "grad_norm": 1.84815296780552, "learning_rate": 4.956603678929766e-06, "loss": 0.2146, "step": 36000 }, { "epoch": 1.42, "grad_norm": 1.2132768250520185, "learning_rate": 4.956561872909699e-06, "loss": 0.1858, "step": 36025 }, { "epoch": 1.42, "grad_norm": 1.3922235347411074, "learning_rate": 4.956520066889632e-06, "loss": 0.1841, "step": 36050 }, { "epoch": 1.42, "grad_norm": 1.4109203186054706, "learning_rate": 4.956478260869565e-06, "loss": 0.2042, "step": 36075 }, { "epoch": 1.42, "grad_norm": 1.076333380375572, "learning_rate": 4.956436454849499e-06, "loss": 0.2103, "step": 36100 }, { "epoch": 1.42, "grad_norm": 2.28510984363013, "learning_rate": 4.956394648829432e-06, "loss": 0.1989, "step": 36125 }, { "epoch": 1.42, "grad_norm": 0.9777457396941752, "learning_rate": 4.956352842809365e-06, "loss": 0.2305, "step": 36150 }, { "epoch": 1.42, "grad_norm": 1.5988408503297, "learning_rate": 4.9563110367892984e-06, "loss": 0.2053, "step": 36175 }, { "epoch": 1.42, "grad_norm": 1.6674285290812338, "learning_rate": 4.956269230769231e-06, "loss": 0.2098, "step": 36200 }, { "epoch": 1.43, "grad_norm": 2.1275665989695556, "learning_rate": 4.956227424749165e-06, "loss": 0.2157, "step": 36225 }, { "epoch": 1.43, "grad_norm": 1.298315469696719, "learning_rate": 4.9561872909699e-06, "loss": 0.1852, "step": 36250 }, { "epoch": 1.43, "grad_norm": 1.4806491205789225, "learning_rate": 4.956145484949833e-06, "loss": 0.2023, "step": 36275 }, { "epoch": 1.43, "grad_norm": 1.7496689306471362, "learning_rate": 4.956103678929766e-06, "loss": 0.2057, "step": 36300 }, { "epoch": 1.43, "grad_norm": 1.723557779898001, "learning_rate": 4.9560618729096995e-06, "loss": 0.217, "step": 36325 }, { "epoch": 1.43, "grad_norm": 1.700269519444899, "learning_rate": 4.956020066889632e-06, "loss": 0.1761, "step": 36350 }, { "epoch": 1.43, "grad_norm": 3.320993952778013, "learning_rate": 4.955978260869566e-06, "loss": 0.1874, "step": 36375 }, { "epoch": 1.43, "grad_norm": 1.5904614343317816, "learning_rate": 4.955936454849498e-06, "loss": 0.2055, "step": 36400 }, { "epoch": 1.43, "grad_norm": 1.1408285586096996, "learning_rate": 4.955894648829432e-06, "loss": 0.2046, "step": 36425 }, { "epoch": 1.43, "grad_norm": 1.7747940283874366, "learning_rate": 4.955852842809365e-06, "loss": 0.1819, "step": 36450 }, { "epoch": 1.43, "grad_norm": 1.4295834467887427, "learning_rate": 4.955811036789298e-06, "loss": 0.1995, "step": 36475 }, { "epoch": 1.44, "grad_norm": 1.2445542660386075, "learning_rate": 4.955769230769231e-06, "loss": 0.1994, "step": 36500 }, { "epoch": 1.44, "grad_norm": 1.4077690160385432, "learning_rate": 4.9557274247491644e-06, "loss": 0.2321, "step": 36525 }, { "epoch": 1.44, "grad_norm": 1.0359673240598635, "learning_rate": 4.955685618729097e-06, "loss": 0.2202, "step": 36550 }, { "epoch": 1.44, "grad_norm": 1.4659278382451848, "learning_rate": 4.955643812709031e-06, "loss": 0.2263, "step": 36575 }, { "epoch": 1.44, "grad_norm": 1.3295779187540848, "learning_rate": 4.955602006688963e-06, "loss": 0.2192, "step": 36600 }, { "epoch": 1.44, "grad_norm": 1.1071332705463832, "learning_rate": 4.955560200668897e-06, "loss": 0.2132, "step": 36625 }, { "epoch": 1.44, "grad_norm": 2.265229352325454, "learning_rate": 4.95551839464883e-06, "loss": 0.1988, "step": 36650 }, { "epoch": 1.44, "grad_norm": 0.9800704407638031, "learning_rate": 4.955476588628763e-06, "loss": 0.2032, "step": 36675 }, { "epoch": 1.44, "grad_norm": 0.9925356070357615, "learning_rate": 4.955434782608696e-06, "loss": 0.215, "step": 36700 }, { "epoch": 1.44, "grad_norm": 1.5204227004534854, "learning_rate": 4.955392976588629e-06, "loss": 0.2107, "step": 36725 }, { "epoch": 1.45, "grad_norm": 2.4452324885329624, "learning_rate": 4.955351170568562e-06, "loss": 0.2164, "step": 36750 }, { "epoch": 1.45, "grad_norm": 1.959056822372053, "learning_rate": 4.955309364548496e-06, "loss": 0.217, "step": 36775 }, { "epoch": 1.45, "grad_norm": 2.3043549385517452, "learning_rate": 4.955267558528428e-06, "loss": 0.2211, "step": 36800 }, { "epoch": 1.45, "grad_norm": 2.2806395544186833, "learning_rate": 4.955225752508361e-06, "loss": 0.1887, "step": 36825 }, { "epoch": 1.45, "grad_norm": 1.3016123333945022, "learning_rate": 4.955183946488295e-06, "loss": 0.2025, "step": 36850 }, { "epoch": 1.45, "grad_norm": 1.4292394564351278, "learning_rate": 4.955142140468227e-06, "loss": 0.2076, "step": 36875 }, { "epoch": 1.45, "grad_norm": 1.9148503359389857, "learning_rate": 4.955100334448161e-06, "loss": 0.2046, "step": 36900 }, { "epoch": 1.45, "grad_norm": 1.6141787965473307, "learning_rate": 4.9550585284280935e-06, "loss": 0.2014, "step": 36925 }, { "epoch": 1.45, "grad_norm": 1.9101519644967175, "learning_rate": 4.955016722408027e-06, "loss": 0.2111, "step": 36950 }, { "epoch": 1.45, "grad_norm": 1.7403141468024392, "learning_rate": 4.95497491638796e-06, "loss": 0.2077, "step": 36975 }, { "epoch": 1.46, "grad_norm": 1.7207730170828357, "learning_rate": 4.954933110367893e-06, "loss": 0.1927, "step": 37000 }, { "epoch": 1.46, "grad_norm": 1.8605826806771688, "learning_rate": 4.954891304347826e-06, "loss": 0.2184, "step": 37025 }, { "epoch": 1.46, "grad_norm": 1.6175520560976995, "learning_rate": 4.9548494983277596e-06, "loss": 0.2098, "step": 37050 }, { "epoch": 1.46, "grad_norm": 1.8085251236732929, "learning_rate": 4.954807692307692e-06, "loss": 0.2257, "step": 37075 }, { "epoch": 1.46, "grad_norm": 1.3604634040424115, "learning_rate": 4.954765886287626e-06, "loss": 0.2019, "step": 37100 }, { "epoch": 1.46, "grad_norm": 1.6502918851829131, "learning_rate": 4.9547240802675585e-06, "loss": 0.1919, "step": 37125 }, { "epoch": 1.46, "grad_norm": 1.7617138039619495, "learning_rate": 4.954682274247492e-06, "loss": 0.2058, "step": 37150 }, { "epoch": 1.46, "grad_norm": 1.4810928710156535, "learning_rate": 4.954640468227425e-06, "loss": 0.1996, "step": 37175 }, { "epoch": 1.46, "grad_norm": 1.5350605129741175, "learning_rate": 4.954598662207358e-06, "loss": 0.2055, "step": 37200 }, { "epoch": 1.46, "grad_norm": 2.0090394477173805, "learning_rate": 4.954556856187292e-06, "loss": 0.1838, "step": 37225 }, { "epoch": 1.47, "grad_norm": 1.2115676280987648, "learning_rate": 4.954516722408027e-06, "loss": 0.2182, "step": 37250 }, { "epoch": 1.47, "grad_norm": 2.2809098857963606, "learning_rate": 4.95447491638796e-06, "loss": 0.2116, "step": 37275 }, { "epoch": 1.47, "grad_norm": 1.7286107298277917, "learning_rate": 4.954433110367893e-06, "loss": 0.1862, "step": 37300 }, { "epoch": 1.47, "grad_norm": 2.3745604372273177, "learning_rate": 4.954391304347827e-06, "loss": 0.2003, "step": 37325 }, { "epoch": 1.47, "grad_norm": 2.179529589103856, "learning_rate": 4.954349498327759e-06, "loss": 0.2023, "step": 37350 }, { "epoch": 1.47, "grad_norm": 1.379480802423559, "learning_rate": 4.954307692307693e-06, "loss": 0.2044, "step": 37375 }, { "epoch": 1.47, "grad_norm": 2.704416220877015, "learning_rate": 4.9542658862876256e-06, "loss": 0.2179, "step": 37400 }, { "epoch": 1.47, "grad_norm": 1.2195378522173297, "learning_rate": 4.954224080267559e-06, "loss": 0.2084, "step": 37425 }, { "epoch": 1.47, "grad_norm": 1.548215462518736, "learning_rate": 4.954182274247492e-06, "loss": 0.2095, "step": 37450 }, { "epoch": 1.47, "grad_norm": 1.488083163244097, "learning_rate": 4.954140468227425e-06, "loss": 0.1998, "step": 37475 }, { "epoch": 1.48, "grad_norm": 1.735700092430531, "learning_rate": 4.954098662207358e-06, "loss": 0.2143, "step": 37500 }, { "epoch": 1.48, "grad_norm": 1.3001115190622823, "learning_rate": 4.954056856187292e-06, "loss": 0.2107, "step": 37525 }, { "epoch": 1.48, "grad_norm": 1.724639356179923, "learning_rate": 4.954015050167224e-06, "loss": 0.2267, "step": 37550 }, { "epoch": 1.48, "grad_norm": 1.836013236024464, "learning_rate": 4.953973244147158e-06, "loss": 0.223, "step": 37575 }, { "epoch": 1.48, "grad_norm": 1.63422094494171, "learning_rate": 4.9539314381270905e-06, "loss": 0.1909, "step": 37600 }, { "epoch": 1.48, "grad_norm": 1.6384726135540142, "learning_rate": 4.953889632107024e-06, "loss": 0.2025, "step": 37625 }, { "epoch": 1.48, "grad_norm": 1.9438112258099434, "learning_rate": 4.953847826086957e-06, "loss": 0.1905, "step": 37650 }, { "epoch": 1.48, "grad_norm": 2.0403963480844216, "learning_rate": 4.95380602006689e-06, "loss": 0.2039, "step": 37675 }, { "epoch": 1.48, "grad_norm": 1.5742246476102364, "learning_rate": 4.953764214046823e-06, "loss": 0.2102, "step": 37700 }, { "epoch": 1.48, "grad_norm": 1.485529188887855, "learning_rate": 4.953722408026757e-06, "loss": 0.2192, "step": 37725 }, { "epoch": 1.49, "grad_norm": 1.761172543764633, "learning_rate": 4.953680602006689e-06, "loss": 0.2076, "step": 37750 }, { "epoch": 1.49, "grad_norm": 0.9982263034997709, "learning_rate": 4.953638795986622e-06, "loss": 0.2182, "step": 37775 }, { "epoch": 1.49, "grad_norm": 2.2204246293357652, "learning_rate": 4.9535969899665555e-06, "loss": 0.2172, "step": 37800 }, { "epoch": 1.49, "grad_norm": 1.3041545197815172, "learning_rate": 4.953555183946488e-06, "loss": 0.21, "step": 37825 }, { "epoch": 1.49, "grad_norm": 2.6384467244877707, "learning_rate": 4.953513377926422e-06, "loss": 0.2086, "step": 37850 }, { "epoch": 1.49, "grad_norm": 1.8368676858921458, "learning_rate": 4.9534715719063545e-06, "loss": 0.2193, "step": 37875 }, { "epoch": 1.49, "grad_norm": 1.68655526333874, "learning_rate": 4.953429765886288e-06, "loss": 0.2122, "step": 37900 }, { "epoch": 1.49, "grad_norm": 2.283888377497305, "learning_rate": 4.953387959866221e-06, "loss": 0.2013, "step": 37925 }, { "epoch": 1.49, "grad_norm": 1.1147280818501166, "learning_rate": 4.953346153846154e-06, "loss": 0.2119, "step": 37950 }, { "epoch": 1.49, "grad_norm": 2.131251084192741, "learning_rate": 4.953304347826087e-06, "loss": 0.1857, "step": 37975 }, { "epoch": 1.49, "grad_norm": 1.879560812566554, "learning_rate": 4.9532625418060205e-06, "loss": 0.2138, "step": 38000 }, { "epoch": 1.5, "grad_norm": 1.1426136680022196, "learning_rate": 4.953220735785953e-06, "loss": 0.1844, "step": 38025 }, { "epoch": 1.5, "grad_norm": 1.5973757490635936, "learning_rate": 4.953178929765887e-06, "loss": 0.2084, "step": 38050 }, { "epoch": 1.5, "grad_norm": 2.8939082265559706, "learning_rate": 4.9531371237458194e-06, "loss": 0.2129, "step": 38075 }, { "epoch": 1.5, "grad_norm": 1.496482622396757, "learning_rate": 4.953095317725753e-06, "loss": 0.2145, "step": 38100 }, { "epoch": 1.5, "grad_norm": 1.593350227368928, "learning_rate": 4.953053511705686e-06, "loss": 0.2064, "step": 38125 }, { "epoch": 1.5, "grad_norm": 1.4347207475008577, "learning_rate": 4.953011705685619e-06, "loss": 0.2101, "step": 38150 }, { "epoch": 1.5, "grad_norm": 1.5414470961110491, "learning_rate": 4.952969899665552e-06, "loss": 0.1994, "step": 38175 }, { "epoch": 1.5, "grad_norm": 1.2999288110788667, "learning_rate": 4.9529280936454855e-06, "loss": 0.19, "step": 38200 }, { "epoch": 1.5, "grad_norm": 1.3896836661728793, "learning_rate": 4.952886287625418e-06, "loss": 0.2014, "step": 38225 }, { "epoch": 1.5, "grad_norm": 1.395012428279921, "learning_rate": 4.952846153846154e-06, "loss": 0.1805, "step": 38250 }, { "epoch": 1.51, "grad_norm": 1.4366727872711265, "learning_rate": 4.9528043478260875e-06, "loss": 0.19, "step": 38275 }, { "epoch": 1.51, "grad_norm": 1.2645834734452694, "learning_rate": 4.95276254180602e-06, "loss": 0.1943, "step": 38300 }, { "epoch": 1.51, "grad_norm": 1.2963361249917345, "learning_rate": 4.952720735785954e-06, "loss": 0.1991, "step": 38325 }, { "epoch": 1.51, "grad_norm": 1.4866651771754051, "learning_rate": 4.9526789297658865e-06, "loss": 0.2245, "step": 38350 }, { "epoch": 1.51, "grad_norm": 0.9546225113518195, "learning_rate": 4.95263712374582e-06, "loss": 0.222, "step": 38375 }, { "epoch": 1.51, "grad_norm": 1.7859246566220357, "learning_rate": 4.952595317725753e-06, "loss": 0.2001, "step": 38400 }, { "epoch": 1.51, "grad_norm": 1.919164444138617, "learning_rate": 4.952553511705686e-06, "loss": 0.1928, "step": 38425 }, { "epoch": 1.51, "grad_norm": 1.3516142246793494, "learning_rate": 4.952511705685619e-06, "loss": 0.2054, "step": 38450 }, { "epoch": 1.51, "grad_norm": 1.8640375790937023, "learning_rate": 4.9524698996655525e-06, "loss": 0.1886, "step": 38475 }, { "epoch": 1.51, "grad_norm": 1.8124716196702333, "learning_rate": 4.952428093645485e-06, "loss": 0.2149, "step": 38500 }, { "epoch": 1.52, "grad_norm": 1.5193585343880305, "learning_rate": 4.952386287625419e-06, "loss": 0.1966, "step": 38525 }, { "epoch": 1.52, "grad_norm": 1.832736275735568, "learning_rate": 4.9523444816053515e-06, "loss": 0.2038, "step": 38550 }, { "epoch": 1.52, "grad_norm": 1.736944723639645, "learning_rate": 4.952302675585285e-06, "loss": 0.1933, "step": 38575 }, { "epoch": 1.52, "grad_norm": 1.630971173062863, "learning_rate": 4.952260869565218e-06, "loss": 0.2004, "step": 38600 }, { "epoch": 1.52, "grad_norm": 1.8873355888793588, "learning_rate": 4.952219063545151e-06, "loss": 0.2176, "step": 38625 }, { "epoch": 1.52, "grad_norm": 1.3335092351991724, "learning_rate": 4.952177257525084e-06, "loss": 0.2189, "step": 38650 }, { "epoch": 1.52, "grad_norm": 2.1938959308869856, "learning_rate": 4.9521354515050175e-06, "loss": 0.2158, "step": 38675 }, { "epoch": 1.52, "grad_norm": 1.1451216064786816, "learning_rate": 4.95209364548495e-06, "loss": 0.1996, "step": 38700 }, { "epoch": 1.52, "grad_norm": 1.1085029403514461, "learning_rate": 4.952051839464883e-06, "loss": 0.2116, "step": 38725 }, { "epoch": 1.52, "grad_norm": 1.4562454164261576, "learning_rate": 4.9520100334448164e-06, "loss": 0.2174, "step": 38750 }, { "epoch": 1.53, "grad_norm": 1.9786520420055989, "learning_rate": 4.951968227424749e-06, "loss": 0.1918, "step": 38775 }, { "epoch": 1.53, "grad_norm": 1.5097439606342058, "learning_rate": 4.951926421404683e-06, "loss": 0.229, "step": 38800 }, { "epoch": 1.53, "grad_norm": 2.7567248665862696, "learning_rate": 4.951884615384615e-06, "loss": 0.2072, "step": 38825 }, { "epoch": 1.53, "grad_norm": 1.7139284656959257, "learning_rate": 4.951842809364549e-06, "loss": 0.2102, "step": 38850 }, { "epoch": 1.53, "grad_norm": 1.7839798240238915, "learning_rate": 4.951801003344482e-06, "loss": 0.2117, "step": 38875 }, { "epoch": 1.53, "grad_norm": 1.3293739028446008, "learning_rate": 4.951759197324415e-06, "loss": 0.1878, "step": 38900 }, { "epoch": 1.53, "grad_norm": 1.6080849310086829, "learning_rate": 4.951717391304348e-06, "loss": 0.2101, "step": 38925 }, { "epoch": 1.53, "grad_norm": 1.250962569095591, "learning_rate": 4.951675585284281e-06, "loss": 0.1938, "step": 38950 }, { "epoch": 1.53, "grad_norm": 2.1190166838811497, "learning_rate": 4.951633779264214e-06, "loss": 0.2159, "step": 38975 }, { "epoch": 1.53, "grad_norm": 2.4326007393935587, "learning_rate": 4.951591973244148e-06, "loss": 0.1985, "step": 39000 }, { "epoch": 1.54, "grad_norm": 1.6414580007592534, "learning_rate": 4.95155016722408e-06, "loss": 0.2048, "step": 39025 }, { "epoch": 1.54, "grad_norm": 1.9216076329480545, "learning_rate": 4.951508361204014e-06, "loss": 0.2124, "step": 39050 }, { "epoch": 1.54, "grad_norm": 1.2728391425359964, "learning_rate": 4.951466555183947e-06, "loss": 0.2193, "step": 39075 }, { "epoch": 1.54, "grad_norm": 1.5009755698819605, "learning_rate": 4.95142474916388e-06, "loss": 0.2223, "step": 39100 }, { "epoch": 1.54, "grad_norm": 1.9770053209281309, "learning_rate": 4.951382943143813e-06, "loss": 0.2027, "step": 39125 }, { "epoch": 1.54, "grad_norm": 1.6300988064698791, "learning_rate": 4.951341137123746e-06, "loss": 0.2077, "step": 39150 }, { "epoch": 1.54, "grad_norm": 1.561710138010411, "learning_rate": 4.951299331103679e-06, "loss": 0.1915, "step": 39175 }, { "epoch": 1.54, "grad_norm": 2.705460436343843, "learning_rate": 4.951257525083613e-06, "loss": 0.192, "step": 39200 }, { "epoch": 1.54, "grad_norm": 1.5991161773874567, "learning_rate": 4.951215719063545e-06, "loss": 0.2001, "step": 39225 }, { "epoch": 1.54, "grad_norm": 1.2368222120398074, "learning_rate": 4.951175585284281e-06, "loss": 0.1984, "step": 39250 }, { "epoch": 1.55, "grad_norm": 2.787832230883491, "learning_rate": 4.951133779264215e-06, "loss": 0.2138, "step": 39275 }, { "epoch": 1.55, "grad_norm": 1.7658617749710668, "learning_rate": 4.951091973244147e-06, "loss": 0.2084, "step": 39300 }, { "epoch": 1.55, "grad_norm": 1.4069418293510692, "learning_rate": 4.951050167224081e-06, "loss": 0.209, "step": 39325 }, { "epoch": 1.55, "grad_norm": 2.167347269613874, "learning_rate": 4.951008361204014e-06, "loss": 0.2259, "step": 39350 }, { "epoch": 1.55, "grad_norm": 1.4967667199328691, "learning_rate": 4.950966555183947e-06, "loss": 0.1938, "step": 39375 }, { "epoch": 1.55, "grad_norm": 1.1203864757858373, "learning_rate": 4.95092474916388e-06, "loss": 0.1996, "step": 39400 }, { "epoch": 1.55, "grad_norm": 1.8801469194282086, "learning_rate": 4.9508829431438135e-06, "loss": 0.2039, "step": 39425 }, { "epoch": 1.55, "grad_norm": 1.327003950516436, "learning_rate": 4.950841137123746e-06, "loss": 0.2206, "step": 39450 }, { "epoch": 1.55, "grad_norm": 2.41131914892425, "learning_rate": 4.95079933110368e-06, "loss": 0.2014, "step": 39475 }, { "epoch": 1.55, "grad_norm": 2.0819564181136587, "learning_rate": 4.950757525083612e-06, "loss": 0.213, "step": 39500 }, { "epoch": 1.55, "grad_norm": 1.6673113392240486, "learning_rate": 4.950715719063546e-06, "loss": 0.2184, "step": 39525 }, { "epoch": 1.56, "grad_norm": 1.380558230210356, "learning_rate": 4.950673913043479e-06, "loss": 0.1943, "step": 39550 }, { "epoch": 1.56, "grad_norm": 2.1697122354109033, "learning_rate": 4.950632107023412e-06, "loss": 0.1906, "step": 39575 }, { "epoch": 1.56, "grad_norm": 1.9418778704353494, "learning_rate": 4.950590301003345e-06, "loss": 0.189, "step": 39600 }, { "epoch": 1.56, "grad_norm": 1.4589182357598085, "learning_rate": 4.950548494983278e-06, "loss": 0.1947, "step": 39625 }, { "epoch": 1.56, "grad_norm": 1.8193230995535195, "learning_rate": 4.950506688963211e-06, "loss": 0.2207, "step": 39650 }, { "epoch": 1.56, "grad_norm": 1.3338701052042354, "learning_rate": 4.950464882943144e-06, "loss": 0.2067, "step": 39675 }, { "epoch": 1.56, "grad_norm": 1.7410998436865661, "learning_rate": 4.950423076923077e-06, "loss": 0.1927, "step": 39700 }, { "epoch": 1.56, "grad_norm": 1.3042207681102427, "learning_rate": 4.95038127090301e-06, "loss": 0.2183, "step": 39725 }, { "epoch": 1.56, "grad_norm": 1.7924918618547276, "learning_rate": 4.950339464882944e-06, "loss": 0.2164, "step": 39750 }, { "epoch": 1.56, "grad_norm": 1.9474636138675756, "learning_rate": 4.950297658862876e-06, "loss": 0.205, "step": 39775 }, { "epoch": 1.57, "grad_norm": 1.6557124540827683, "learning_rate": 4.95025585284281e-06, "loss": 0.2181, "step": 39800 }, { "epoch": 1.57, "grad_norm": 1.2497598066214064, "learning_rate": 4.9502140468227426e-06, "loss": 0.2372, "step": 39825 }, { "epoch": 1.57, "grad_norm": 1.6087866416925465, "learning_rate": 4.950172240802676e-06, "loss": 0.2086, "step": 39850 }, { "epoch": 1.57, "grad_norm": 1.4725315047956657, "learning_rate": 4.950130434782609e-06, "loss": 0.2029, "step": 39875 }, { "epoch": 1.57, "grad_norm": 1.4054406114906426, "learning_rate": 4.950088628762542e-06, "loss": 0.1899, "step": 39900 }, { "epoch": 1.57, "grad_norm": 1.705726548553069, "learning_rate": 4.950046822742475e-06, "loss": 0.1959, "step": 39925 }, { "epoch": 1.57, "grad_norm": 1.3147943647253022, "learning_rate": 4.950005016722409e-06, "loss": 0.2099, "step": 39950 }, { "epoch": 1.57, "grad_norm": 1.199900310959841, "learning_rate": 4.949963210702341e-06, "loss": 0.1991, "step": 39975 }, { "epoch": 1.57, "grad_norm": 1.5347734934426658, "learning_rate": 4.949921404682275e-06, "loss": 0.2056, "step": 40000 }, { "epoch": 1.57, "eval_loss": 0.471435546875, "eval_runtime": 11603.9924, "eval_samples_per_second": 0.816, "eval_steps_per_second": 0.051, "eval_wer": 0.12227530846323376, "step": 40000 }, { "epoch": 1.57, "grad_norm": 1.2625514121666555, "learning_rate": 4.9498795986622075e-06, "loss": 0.2043, "step": 40025 }, { "epoch": 1.58, "grad_norm": 1.598268805422133, "learning_rate": 4.949837792642141e-06, "loss": 0.2222, "step": 40050 }, { "epoch": 1.58, "grad_norm": 1.8626188809803597, "learning_rate": 4.949795986622074e-06, "loss": 0.2062, "step": 40075 }, { "epoch": 1.58, "grad_norm": 1.2772100379671945, "learning_rate": 4.949754180602007e-06, "loss": 0.1935, "step": 40100 }, { "epoch": 1.58, "grad_norm": 1.623164374874199, "learning_rate": 4.94971237458194e-06, "loss": 0.2072, "step": 40125 }, { "epoch": 1.58, "grad_norm": 1.8510882784804008, "learning_rate": 4.9496705685618736e-06, "loss": 0.2103, "step": 40150 }, { "epoch": 1.58, "grad_norm": 1.1111748213696224, "learning_rate": 4.949628762541806e-06, "loss": 0.2119, "step": 40175 }, { "epoch": 1.58, "grad_norm": 1.8817631319158592, "learning_rate": 4.94958695652174e-06, "loss": 0.2284, "step": 40200 }, { "epoch": 1.58, "grad_norm": 1.7672470719864912, "learning_rate": 4.9495451505016725e-06, "loss": 0.2094, "step": 40225 }, { "epoch": 1.58, "grad_norm": 1.5564127286984561, "learning_rate": 4.949505016722408e-06, "loss": 0.1975, "step": 40250 }, { "epoch": 1.58, "grad_norm": 1.6106298300859476, "learning_rate": 4.949463210702342e-06, "loss": 0.2036, "step": 40275 }, { "epoch": 1.59, "grad_norm": 1.9484881833986716, "learning_rate": 4.949421404682275e-06, "loss": 0.2186, "step": 40300 }, { "epoch": 1.59, "grad_norm": 1.9152229538502286, "learning_rate": 4.949379598662208e-06, "loss": 0.195, "step": 40325 }, { "epoch": 1.59, "grad_norm": 2.014374946325913, "learning_rate": 4.949337792642141e-06, "loss": 0.2268, "step": 40350 }, { "epoch": 1.59, "grad_norm": 1.4380015003183522, "learning_rate": 4.949295986622074e-06, "loss": 0.2022, "step": 40375 }, { "epoch": 1.59, "grad_norm": 1.8807119242703392, "learning_rate": 4.949254180602007e-06, "loss": 0.2071, "step": 40400 }, { "epoch": 1.59, "grad_norm": 1.2257784971118444, "learning_rate": 4.949212374581941e-06, "loss": 0.2172, "step": 40425 }, { "epoch": 1.59, "grad_norm": 1.3513506180545665, "learning_rate": 4.949170568561873e-06, "loss": 0.2005, "step": 40450 }, { "epoch": 1.59, "grad_norm": 1.7195640433114703, "learning_rate": 4.949128762541807e-06, "loss": 0.2044, "step": 40475 }, { "epoch": 1.59, "grad_norm": 1.8785362354699997, "learning_rate": 4.9490869565217396e-06, "loss": 0.2008, "step": 40500 }, { "epoch": 1.59, "grad_norm": 1.4693582668589034, "learning_rate": 4.949045150501673e-06, "loss": 0.1974, "step": 40525 }, { "epoch": 1.6, "grad_norm": 2.126779893482259, "learning_rate": 4.949003344481606e-06, "loss": 0.2031, "step": 40550 }, { "epoch": 1.6, "grad_norm": 2.0166603572842985, "learning_rate": 4.9489615384615385e-06, "loss": 0.203, "step": 40575 }, { "epoch": 1.6, "grad_norm": 1.530039181431065, "learning_rate": 4.948919732441472e-06, "loss": 0.205, "step": 40600 }, { "epoch": 1.6, "grad_norm": 1.2765710499831335, "learning_rate": 4.948877926421405e-06, "loss": 0.1917, "step": 40625 }, { "epoch": 1.6, "grad_norm": 1.361058220611067, "learning_rate": 4.948836120401338e-06, "loss": 0.1953, "step": 40650 }, { "epoch": 1.6, "grad_norm": 2.077832653814155, "learning_rate": 4.948794314381271e-06, "loss": 0.2052, "step": 40675 }, { "epoch": 1.6, "grad_norm": 1.4160926776241496, "learning_rate": 4.9487525083612045e-06, "loss": 0.2133, "step": 40700 }, { "epoch": 1.6, "grad_norm": 1.0655600758672423, "learning_rate": 4.948710702341137e-06, "loss": 0.1919, "step": 40725 }, { "epoch": 1.6, "grad_norm": 1.6603254897395308, "learning_rate": 4.948668896321071e-06, "loss": 0.2079, "step": 40750 }, { "epoch": 1.6, "grad_norm": 1.768524529622312, "learning_rate": 4.9486270903010035e-06, "loss": 0.2002, "step": 40775 }, { "epoch": 1.61, "grad_norm": 1.7012943976613921, "learning_rate": 4.948585284280937e-06, "loss": 0.2011, "step": 40800 }, { "epoch": 1.61, "grad_norm": 1.7082794174545288, "learning_rate": 4.94854347826087e-06, "loss": 0.2062, "step": 40825 }, { "epoch": 1.61, "grad_norm": 1.928279567494617, "learning_rate": 4.948501672240803e-06, "loss": 0.1939, "step": 40850 }, { "epoch": 1.61, "grad_norm": 1.9975469181079872, "learning_rate": 4.948459866220736e-06, "loss": 0.195, "step": 40875 }, { "epoch": 1.61, "grad_norm": 0.9802397774014037, "learning_rate": 4.9484180602006695e-06, "loss": 0.2166, "step": 40900 }, { "epoch": 1.61, "grad_norm": 1.443205417331069, "learning_rate": 4.948376254180602e-06, "loss": 0.219, "step": 40925 }, { "epoch": 1.61, "grad_norm": 1.8443352124627206, "learning_rate": 4.948334448160536e-06, "loss": 0.1943, "step": 40950 }, { "epoch": 1.61, "grad_norm": 1.193610360908578, "learning_rate": 4.9482926421404685e-06, "loss": 0.197, "step": 40975 }, { "epoch": 1.61, "grad_norm": 1.4623521124140761, "learning_rate": 4.948250836120402e-06, "loss": 0.2081, "step": 41000 }, { "epoch": 1.61, "grad_norm": 1.3374220657812355, "learning_rate": 4.948209030100335e-06, "loss": 0.1975, "step": 41025 }, { "epoch": 1.61, "grad_norm": 1.1336669393881718, "learning_rate": 4.948167224080268e-06, "loss": 0.203, "step": 41050 }, { "epoch": 1.62, "grad_norm": 2.2493898272913384, "learning_rate": 4.948125418060201e-06, "loss": 0.1948, "step": 41075 }, { "epoch": 1.62, "grad_norm": 1.2972621198738996, "learning_rate": 4.9480836120401345e-06, "loss": 0.2192, "step": 41100 }, { "epoch": 1.62, "grad_norm": 2.388290835184551, "learning_rate": 4.948041806020067e-06, "loss": 0.2166, "step": 41125 }, { "epoch": 1.62, "grad_norm": 1.5916665380726456, "learning_rate": 4.948000000000001e-06, "loss": 0.2136, "step": 41150 }, { "epoch": 1.62, "grad_norm": 1.8625346417846564, "learning_rate": 4.9479581939799334e-06, "loss": 0.2046, "step": 41175 }, { "epoch": 1.62, "grad_norm": 1.2016947650644971, "learning_rate": 4.947916387959867e-06, "loss": 0.1867, "step": 41200 }, { "epoch": 1.62, "grad_norm": 1.5740502812375239, "learning_rate": 4.9478745819398e-06, "loss": 0.1927, "step": 41225 }, { "epoch": 1.62, "grad_norm": 1.3675578577012184, "learning_rate": 4.9478344481605355e-06, "loss": 0.1967, "step": 41250 }, { "epoch": 1.62, "grad_norm": 1.3133012100842971, "learning_rate": 4.947792642140469e-06, "loss": 0.2053, "step": 41275 }, { "epoch": 1.62, "grad_norm": 1.8309449231056476, "learning_rate": 4.947750836120402e-06, "loss": 0.2045, "step": 41300 }, { "epoch": 1.63, "grad_norm": 2.000483291522688, "learning_rate": 4.947709030100335e-06, "loss": 0.2107, "step": 41325 }, { "epoch": 1.63, "grad_norm": 1.8991039424577518, "learning_rate": 4.947667224080268e-06, "loss": 0.1881, "step": 41350 }, { "epoch": 1.63, "grad_norm": 1.7183900225861886, "learning_rate": 4.9476254180602015e-06, "loss": 0.1982, "step": 41375 }, { "epoch": 1.63, "grad_norm": 1.5200229992542378, "learning_rate": 4.947583612040134e-06, "loss": 0.1985, "step": 41400 }, { "epoch": 1.63, "grad_norm": 1.420445600987573, "learning_rate": 4.947541806020068e-06, "loss": 0.2126, "step": 41425 }, { "epoch": 1.63, "grad_norm": 2.1764873951692207, "learning_rate": 4.9475000000000005e-06, "loss": 0.21, "step": 41450 }, { "epoch": 1.63, "grad_norm": 1.2261555787798102, "learning_rate": 4.947458193979934e-06, "loss": 0.21, "step": 41475 }, { "epoch": 1.63, "grad_norm": 1.7188400172880676, "learning_rate": 4.947416387959867e-06, "loss": 0.1842, "step": 41500 }, { "epoch": 1.63, "grad_norm": 1.0411835673772099, "learning_rate": 4.9473745819397994e-06, "loss": 0.1884, "step": 41525 }, { "epoch": 1.63, "grad_norm": 0.8882216836071882, "learning_rate": 4.947332775919733e-06, "loss": 0.1994, "step": 41550 }, { "epoch": 1.64, "grad_norm": 1.1029055988375176, "learning_rate": 4.947290969899666e-06, "loss": 0.1998, "step": 41575 }, { "epoch": 1.64, "grad_norm": 1.3852789220034836, "learning_rate": 4.947249163879599e-06, "loss": 0.2331, "step": 41600 }, { "epoch": 1.64, "grad_norm": 1.324731226538153, "learning_rate": 4.947207357859532e-06, "loss": 0.2266, "step": 41625 }, { "epoch": 1.64, "grad_norm": 2.4657036693086165, "learning_rate": 4.9471655518394655e-06, "loss": 0.215, "step": 41650 }, { "epoch": 1.64, "grad_norm": 1.527084020270202, "learning_rate": 4.947123745819398e-06, "loss": 0.1884, "step": 41675 }, { "epoch": 1.64, "grad_norm": 1.6846948970525009, "learning_rate": 4.947081939799332e-06, "loss": 0.2197, "step": 41700 }, { "epoch": 1.64, "grad_norm": 1.3851303237662287, "learning_rate": 4.947040133779264e-06, "loss": 0.2075, "step": 41725 }, { "epoch": 1.64, "grad_norm": 1.734382175640918, "learning_rate": 4.946998327759198e-06, "loss": 0.1901, "step": 41750 }, { "epoch": 1.64, "grad_norm": 1.8727000776687595, "learning_rate": 4.946956521739131e-06, "loss": 0.2099, "step": 41775 }, { "epoch": 1.64, "grad_norm": 1.3410587525533615, "learning_rate": 4.946914715719064e-06, "loss": 0.2144, "step": 41800 }, { "epoch": 1.65, "grad_norm": 2.232771457346267, "learning_rate": 4.946872909698997e-06, "loss": 0.2167, "step": 41825 }, { "epoch": 1.65, "grad_norm": 1.5134483112849184, "learning_rate": 4.9468311036789304e-06, "loss": 0.2034, "step": 41850 }, { "epoch": 1.65, "grad_norm": 1.2186748135314416, "learning_rate": 4.946789297658863e-06, "loss": 0.2016, "step": 41875 }, { "epoch": 1.65, "grad_norm": 1.5576513070546023, "learning_rate": 4.946747491638797e-06, "loss": 0.2147, "step": 41900 }, { "epoch": 1.65, "grad_norm": 2.350090298252575, "learning_rate": 4.946705685618729e-06, "loss": 0.2087, "step": 41925 }, { "epoch": 1.65, "grad_norm": 1.5693305584665407, "learning_rate": 4.946663879598663e-06, "loss": 0.2105, "step": 41950 }, { "epoch": 1.65, "grad_norm": 1.664226564042428, "learning_rate": 4.946622073578596e-06, "loss": 0.2092, "step": 41975 }, { "epoch": 1.65, "grad_norm": 2.5203247472068897, "learning_rate": 4.946580267558529e-06, "loss": 0.21, "step": 42000 }, { "epoch": 1.65, "grad_norm": 1.6732043500656888, "learning_rate": 4.946538461538462e-06, "loss": 0.2172, "step": 42025 }, { "epoch": 1.65, "grad_norm": 2.1170712647253107, "learning_rate": 4.946496655518395e-06, "loss": 0.2095, "step": 42050 }, { "epoch": 1.66, "grad_norm": 2.27085798758864, "learning_rate": 4.946454849498328e-06, "loss": 0.1996, "step": 42075 }, { "epoch": 1.66, "grad_norm": 2.121975214434564, "learning_rate": 4.946413043478262e-06, "loss": 0.1915, "step": 42100 }, { "epoch": 1.66, "grad_norm": 1.8454823137618726, "learning_rate": 4.946371237458194e-06, "loss": 0.2185, "step": 42125 }, { "epoch": 1.66, "grad_norm": 1.7165066242154234, "learning_rate": 4.946329431438128e-06, "loss": 0.2285, "step": 42150 }, { "epoch": 1.66, "grad_norm": 1.1935629947391357, "learning_rate": 4.946287625418061e-06, "loss": 0.2055, "step": 42175 }, { "epoch": 1.66, "grad_norm": 1.6263960825451074, "learning_rate": 4.946245819397994e-06, "loss": 0.1963, "step": 42200 }, { "epoch": 1.66, "grad_norm": 1.5830643088692502, "learning_rate": 4.946204013377927e-06, "loss": 0.1938, "step": 42225 }, { "epoch": 1.66, "grad_norm": 1.188351267211218, "learning_rate": 4.946163879598663e-06, "loss": 0.1972, "step": 42250 }, { "epoch": 1.66, "grad_norm": 1.5228388266516044, "learning_rate": 4.946122073578596e-06, "loss": 0.2146, "step": 42275 }, { "epoch": 1.66, "grad_norm": 1.2417331494377257, "learning_rate": 4.946080267558529e-06, "loss": 0.1901, "step": 42300 }, { "epoch": 1.67, "grad_norm": 1.2050316831091845, "learning_rate": 4.9460384615384625e-06, "loss": 0.1964, "step": 42325 }, { "epoch": 1.67, "grad_norm": 0.7456359383137927, "learning_rate": 4.945996655518395e-06, "loss": 0.2114, "step": 42350 }, { "epoch": 1.67, "grad_norm": 1.1280620070130793, "learning_rate": 4.945954849498329e-06, "loss": 0.204, "step": 42375 }, { "epoch": 1.67, "grad_norm": 1.8154667054830138, "learning_rate": 4.945913043478261e-06, "loss": 0.2017, "step": 42400 }, { "epoch": 1.67, "grad_norm": 1.6384097778731235, "learning_rate": 4.945871237458195e-06, "loss": 0.2066, "step": 42425 }, { "epoch": 1.67, "grad_norm": 1.8156792744274872, "learning_rate": 4.945829431438128e-06, "loss": 0.1958, "step": 42450 }, { "epoch": 1.67, "grad_norm": 1.1599616876765457, "learning_rate": 4.94578762541806e-06, "loss": 0.2014, "step": 42475 }, { "epoch": 1.67, "grad_norm": 1.4247687110866212, "learning_rate": 4.945745819397993e-06, "loss": 0.2277, "step": 42500 }, { "epoch": 1.67, "grad_norm": 1.8355672996659873, "learning_rate": 4.945704013377927e-06, "loss": 0.2099, "step": 42525 }, { "epoch": 1.67, "grad_norm": 2.425217424300148, "learning_rate": 4.94566220735786e-06, "loss": 0.2016, "step": 42550 }, { "epoch": 1.67, "grad_norm": 1.7198208972430582, "learning_rate": 4.945620401337793e-06, "loss": 0.202, "step": 42575 }, { "epoch": 1.68, "grad_norm": 1.1552545949551052, "learning_rate": 4.945578595317726e-06, "loss": 0.221, "step": 42600 }, { "epoch": 1.68, "grad_norm": 1.4029219339538905, "learning_rate": 4.945536789297659e-06, "loss": 0.1987, "step": 42625 }, { "epoch": 1.68, "grad_norm": 2.3882183271546116, "learning_rate": 4.945494983277593e-06, "loss": 0.2081, "step": 42650 }, { "epoch": 1.68, "grad_norm": 0.9199663729648839, "learning_rate": 4.945453177257525e-06, "loss": 0.1973, "step": 42675 }, { "epoch": 1.68, "grad_norm": 0.8901908136062112, "learning_rate": 4.945411371237459e-06, "loss": 0.2012, "step": 42700 }, { "epoch": 1.68, "grad_norm": 1.3827221258424418, "learning_rate": 4.945369565217392e-06, "loss": 0.2079, "step": 42725 }, { "epoch": 1.68, "grad_norm": 1.7149862183278763, "learning_rate": 4.945327759197325e-06, "loss": 0.1983, "step": 42750 }, { "epoch": 1.68, "grad_norm": 1.9186732501542165, "learning_rate": 4.945285953177258e-06, "loss": 0.2022, "step": 42775 }, { "epoch": 1.68, "grad_norm": 1.633325078705575, "learning_rate": 4.945244147157191e-06, "loss": 0.2094, "step": 42800 }, { "epoch": 1.68, "grad_norm": 1.397331371465462, "learning_rate": 4.945202341137124e-06, "loss": 0.2227, "step": 42825 }, { "epoch": 1.69, "grad_norm": 1.4918868776955483, "learning_rate": 4.945160535117058e-06, "loss": 0.214, "step": 42850 }, { "epoch": 1.69, "grad_norm": 1.595591918880962, "learning_rate": 4.94511872909699e-06, "loss": 0.2151, "step": 42875 }, { "epoch": 1.69, "grad_norm": 1.7222991867626707, "learning_rate": 4.945076923076924e-06, "loss": 0.2128, "step": 42900 }, { "epoch": 1.69, "grad_norm": 1.1691471473743391, "learning_rate": 4.9450351170568566e-06, "loss": 0.2075, "step": 42925 }, { "epoch": 1.69, "grad_norm": 1.4662345851129484, "learning_rate": 4.94499331103679e-06, "loss": 0.2125, "step": 42950 }, { "epoch": 1.69, "grad_norm": 1.5717643240108634, "learning_rate": 4.944951505016723e-06, "loss": 0.1802, "step": 42975 }, { "epoch": 1.69, "grad_norm": 1.3287909014275803, "learning_rate": 4.944909698996656e-06, "loss": 0.1952, "step": 43000 }, { "epoch": 1.69, "grad_norm": 1.097159089815716, "learning_rate": 4.944867892976589e-06, "loss": 0.202, "step": 43025 }, { "epoch": 1.69, "grad_norm": 1.9895232385883712, "learning_rate": 4.944826086956523e-06, "loss": 0.2109, "step": 43050 }, { "epoch": 1.69, "grad_norm": 1.5103690475667175, "learning_rate": 4.944784280936455e-06, "loss": 0.189, "step": 43075 }, { "epoch": 1.7, "grad_norm": 1.882631524404233, "learning_rate": 4.944742474916389e-06, "loss": 0.1967, "step": 43100 }, { "epoch": 1.7, "grad_norm": 1.3046423330005568, "learning_rate": 4.9447006688963215e-06, "loss": 0.1788, "step": 43125 }, { "epoch": 1.7, "grad_norm": 1.693461277845761, "learning_rate": 4.944658862876255e-06, "loss": 0.1978, "step": 43150 }, { "epoch": 1.7, "grad_norm": 1.453539084307599, "learning_rate": 4.944617056856188e-06, "loss": 0.2291, "step": 43175 }, { "epoch": 1.7, "grad_norm": 1.7794757369908873, "learning_rate": 4.9445752508361205e-06, "loss": 0.2148, "step": 43200 }, { "epoch": 1.7, "grad_norm": 1.7997181495063956, "learning_rate": 4.944533444816054e-06, "loss": 0.1912, "step": 43225 }, { "epoch": 1.7, "grad_norm": 2.103310345158487, "learning_rate": 4.94449331103679e-06, "loss": 0.2029, "step": 43250 }, { "epoch": 1.7, "grad_norm": 1.3283148483923, "learning_rate": 4.944451505016723e-06, "loss": 0.2016, "step": 43275 }, { "epoch": 1.7, "grad_norm": 1.5231191363195111, "learning_rate": 4.944409698996656e-06, "loss": 0.194, "step": 43300 }, { "epoch": 1.7, "grad_norm": 1.727077984512469, "learning_rate": 4.94436789297659e-06, "loss": 0.2031, "step": 43325 }, { "epoch": 1.71, "grad_norm": 1.453989318654602, "learning_rate": 4.944326086956522e-06, "loss": 0.2083, "step": 43350 }, { "epoch": 1.71, "grad_norm": 1.5469548856275384, "learning_rate": 4.944284280936456e-06, "loss": 0.2176, "step": 43375 }, { "epoch": 1.71, "grad_norm": 3.160825934896527, "learning_rate": 4.944242474916388e-06, "loss": 0.2022, "step": 43400 }, { "epoch": 1.71, "grad_norm": 1.5719087440594064, "learning_rate": 4.944200668896321e-06, "loss": 0.2125, "step": 43425 }, { "epoch": 1.71, "grad_norm": 1.3656354570889466, "learning_rate": 4.944158862876254e-06, "loss": 0.2117, "step": 43450 }, { "epoch": 1.71, "grad_norm": 1.5367941234175355, "learning_rate": 4.9441170568561875e-06, "loss": 0.2176, "step": 43475 }, { "epoch": 1.71, "grad_norm": 2.4417364583297174, "learning_rate": 4.94407525083612e-06, "loss": 0.1879, "step": 43500 }, { "epoch": 1.71, "grad_norm": 1.2630478840085972, "learning_rate": 4.944033444816054e-06, "loss": 0.2, "step": 43525 }, { "epoch": 1.71, "grad_norm": 1.112072451582671, "learning_rate": 4.9439916387959865e-06, "loss": 0.1968, "step": 43550 }, { "epoch": 1.71, "grad_norm": 1.7713642185306702, "learning_rate": 4.94394983277592e-06, "loss": 0.2267, "step": 43575 }, { "epoch": 1.72, "grad_norm": 1.2056972754693054, "learning_rate": 4.943908026755853e-06, "loss": 0.2014, "step": 43600 }, { "epoch": 1.72, "grad_norm": 2.092655557657814, "learning_rate": 4.943866220735786e-06, "loss": 0.2009, "step": 43625 }, { "epoch": 1.72, "grad_norm": 1.7225505908943808, "learning_rate": 4.94382441471572e-06, "loss": 0.2268, "step": 43650 }, { "epoch": 1.72, "grad_norm": 1.5368215742705937, "learning_rate": 4.9437826086956525e-06, "loss": 0.2055, "step": 43675 }, { "epoch": 1.72, "grad_norm": 1.3197247012408562, "learning_rate": 4.943740802675586e-06, "loss": 0.2343, "step": 43700 }, { "epoch": 1.72, "grad_norm": 1.3772135831744492, "learning_rate": 4.943698996655519e-06, "loss": 0.1976, "step": 43725 }, { "epoch": 1.72, "grad_norm": 1.4326058066713432, "learning_rate": 4.943657190635452e-06, "loss": 0.2031, "step": 43750 }, { "epoch": 1.72, "grad_norm": 1.5888807750342064, "learning_rate": 4.943615384615385e-06, "loss": 0.2074, "step": 43775 }, { "epoch": 1.72, "grad_norm": 1.5396007104076448, "learning_rate": 4.9435735785953185e-06, "loss": 0.2067, "step": 43800 }, { "epoch": 1.72, "grad_norm": 2.4011127551195726, "learning_rate": 4.943531772575251e-06, "loss": 0.1787, "step": 43825 }, { "epoch": 1.73, "grad_norm": 0.833780754470428, "learning_rate": 4.943489966555185e-06, "loss": 0.1943, "step": 43850 }, { "epoch": 1.73, "grad_norm": 1.4268589028719112, "learning_rate": 4.9434481605351175e-06, "loss": 0.184, "step": 43875 }, { "epoch": 1.73, "grad_norm": 2.1879663087707617, "learning_rate": 4.943406354515051e-06, "loss": 0.2, "step": 43900 }, { "epoch": 1.73, "grad_norm": 1.4837138515357116, "learning_rate": 4.943364548494984e-06, "loss": 0.1938, "step": 43925 }, { "epoch": 1.73, "grad_norm": 1.302090890735732, "learning_rate": 4.943322742474917e-06, "loss": 0.2179, "step": 43950 }, { "epoch": 1.73, "grad_norm": 1.861136682417232, "learning_rate": 4.94328093645485e-06, "loss": 0.2182, "step": 43975 }, { "epoch": 1.73, "grad_norm": 2.335286311587073, "learning_rate": 4.9432391304347835e-06, "loss": 0.2214, "step": 44000 }, { "epoch": 1.73, "grad_norm": 1.6769155432405607, "learning_rate": 4.943197324414716e-06, "loss": 0.2042, "step": 44025 }, { "epoch": 1.73, "grad_norm": 2.200106902410679, "learning_rate": 4.94315551839465e-06, "loss": 0.1987, "step": 44050 }, { "epoch": 1.73, "grad_norm": 1.7563502007021174, "learning_rate": 4.9431137123745825e-06, "loss": 0.1951, "step": 44075 }, { "epoch": 1.73, "grad_norm": 1.5530867625540488, "learning_rate": 4.943071906354516e-06, "loss": 0.2139, "step": 44100 }, { "epoch": 1.74, "grad_norm": 1.3534943489075693, "learning_rate": 4.943030100334449e-06, "loss": 0.1773, "step": 44125 }, { "epoch": 1.74, "grad_norm": 1.4957898910507397, "learning_rate": 4.942988294314381e-06, "loss": 0.228, "step": 44150 }, { "epoch": 1.74, "grad_norm": 2.8785662557550045, "learning_rate": 4.942946488294315e-06, "loss": 0.2081, "step": 44175 }, { "epoch": 1.74, "grad_norm": 1.712164070763034, "learning_rate": 4.942904682274248e-06, "loss": 0.2042, "step": 44200 }, { "epoch": 1.74, "grad_norm": 1.52945812067655, "learning_rate": 4.942862876254181e-06, "loss": 0.2277, "step": 44225 }, { "epoch": 1.74, "grad_norm": 1.3722433996508723, "learning_rate": 4.942822742474917e-06, "loss": 0.2299, "step": 44250 }, { "epoch": 1.74, "grad_norm": 1.5393189925564164, "learning_rate": 4.94278093645485e-06, "loss": 0.2175, "step": 44275 }, { "epoch": 1.74, "grad_norm": 1.5822179394649376, "learning_rate": 4.942739130434783e-06, "loss": 0.2038, "step": 44300 }, { "epoch": 1.74, "grad_norm": 2.3792366307810666, "learning_rate": 4.942697324414717e-06, "loss": 0.2047, "step": 44325 }, { "epoch": 1.74, "grad_norm": 1.70816553270609, "learning_rate": 4.942655518394649e-06, "loss": 0.2217, "step": 44350 }, { "epoch": 1.75, "grad_norm": 1.2792536160078478, "learning_rate": 4.942613712374582e-06, "loss": 0.2131, "step": 44375 }, { "epoch": 1.75, "grad_norm": 1.4575673021789084, "learning_rate": 4.942571906354515e-06, "loss": 0.2131, "step": 44400 }, { "epoch": 1.75, "grad_norm": 1.8400518563065749, "learning_rate": 4.9425301003344485e-06, "loss": 0.2037, "step": 44425 }, { "epoch": 1.75, "grad_norm": 1.5635883512841116, "learning_rate": 4.942488294314381e-06, "loss": 0.2091, "step": 44450 }, { "epoch": 1.75, "grad_norm": 2.0007327369281773, "learning_rate": 4.942446488294315e-06, "loss": 0.2127, "step": 44475 }, { "epoch": 1.75, "grad_norm": 2.119742302055427, "learning_rate": 4.942404682274247e-06, "loss": 0.2012, "step": 44500 }, { "epoch": 1.75, "grad_norm": 1.9344147417555144, "learning_rate": 4.942362876254181e-06, "loss": 0.2013, "step": 44525 }, { "epoch": 1.75, "grad_norm": 1.328539603876774, "learning_rate": 4.942321070234114e-06, "loss": 0.1986, "step": 44550 }, { "epoch": 1.75, "grad_norm": 1.7040707858619548, "learning_rate": 4.942279264214047e-06, "loss": 0.1954, "step": 44575 }, { "epoch": 1.75, "grad_norm": 2.4008583236352887, "learning_rate": 4.94223745819398e-06, "loss": 0.2226, "step": 44600 }, { "epoch": 1.76, "grad_norm": 1.148723618960234, "learning_rate": 4.9421956521739134e-06, "loss": 0.2067, "step": 44625 }, { "epoch": 1.76, "grad_norm": 1.4833158950358802, "learning_rate": 4.942153846153846e-06, "loss": 0.2012, "step": 44650 }, { "epoch": 1.76, "grad_norm": 1.8801569799377404, "learning_rate": 4.94211204013378e-06, "loss": 0.204, "step": 44675 }, { "epoch": 1.76, "grad_norm": 1.6972529337434588, "learning_rate": 4.942070234113712e-06, "loss": 0.2183, "step": 44700 }, { "epoch": 1.76, "grad_norm": 1.7424024925861386, "learning_rate": 4.942028428093646e-06, "loss": 0.196, "step": 44725 }, { "epoch": 1.76, "grad_norm": 1.419744820585979, "learning_rate": 4.9419866220735795e-06, "loss": 0.2205, "step": 44750 }, { "epoch": 1.76, "grad_norm": 1.773705854022055, "learning_rate": 4.941944816053512e-06, "loss": 0.1951, "step": 44775 }, { "epoch": 1.76, "grad_norm": 1.6575512331545263, "learning_rate": 4.941903010033446e-06, "loss": 0.1998, "step": 44800 }, { "epoch": 1.76, "grad_norm": 0.8632058992042098, "learning_rate": 4.941861204013378e-06, "loss": 0.2272, "step": 44825 }, { "epoch": 1.76, "grad_norm": 1.8833068240202187, "learning_rate": 4.941819397993312e-06, "loss": 0.23, "step": 44850 }, { "epoch": 1.77, "grad_norm": 1.5386804945912553, "learning_rate": 4.941777591973245e-06, "loss": 0.2026, "step": 44875 }, { "epoch": 1.77, "grad_norm": 1.1855200203299385, "learning_rate": 4.941735785953178e-06, "loss": 0.2161, "step": 44900 }, { "epoch": 1.77, "grad_norm": 1.9564640886071363, "learning_rate": 4.941693979933111e-06, "loss": 0.1789, "step": 44925 }, { "epoch": 1.77, "grad_norm": 2.340358116387867, "learning_rate": 4.9416521739130444e-06, "loss": 0.1937, "step": 44950 }, { "epoch": 1.77, "grad_norm": 2.1872850428854087, "learning_rate": 4.941610367892977e-06, "loss": 0.2088, "step": 44975 }, { "epoch": 1.77, "grad_norm": 1.3941237415424022, "learning_rate": 4.941568561872911e-06, "loss": 0.215, "step": 45000 }, { "epoch": 1.77, "grad_norm": 1.2422150166796995, "learning_rate": 4.941526755852843e-06, "loss": 0.1978, "step": 45025 }, { "epoch": 1.77, "grad_norm": 1.2834289450493046, "learning_rate": 4.941484949832777e-06, "loss": 0.2019, "step": 45050 }, { "epoch": 1.77, "grad_norm": 1.3887225318643923, "learning_rate": 4.941443143812709e-06, "loss": 0.2005, "step": 45075 }, { "epoch": 1.77, "grad_norm": 1.5957356966876162, "learning_rate": 4.941401337792642e-06, "loss": 0.2116, "step": 45100 }, { "epoch": 1.78, "grad_norm": 1.7998947945044745, "learning_rate": 4.941359531772575e-06, "loss": 0.2113, "step": 45125 }, { "epoch": 1.78, "grad_norm": 1.2863221238150597, "learning_rate": 4.9413177257525086e-06, "loss": 0.1909, "step": 45150 }, { "epoch": 1.78, "grad_norm": 1.291509095965881, "learning_rate": 4.941275919732442e-06, "loss": 0.1993, "step": 45175 }, { "epoch": 1.78, "grad_norm": 2.1958530909710876, "learning_rate": 4.941234113712375e-06, "loss": 0.2115, "step": 45200 }, { "epoch": 1.78, "grad_norm": 1.5439304960130933, "learning_rate": 4.941192307692308e-06, "loss": 0.1959, "step": 45225 }, { "epoch": 1.78, "grad_norm": 1.9055756132780866, "learning_rate": 4.941152173913044e-06, "loss": 0.2022, "step": 45250 }, { "epoch": 1.78, "grad_norm": 1.2942694636126806, "learning_rate": 4.941110367892977e-06, "loss": 0.1945, "step": 45275 }, { "epoch": 1.78, "grad_norm": 1.6378120736690176, "learning_rate": 4.94106856187291e-06, "loss": 0.2001, "step": 45300 }, { "epoch": 1.78, "grad_norm": 1.7309152118782125, "learning_rate": 4.941026755852843e-06, "loss": 0.189, "step": 45325 }, { "epoch": 1.78, "grad_norm": 1.5926592940812094, "learning_rate": 4.940984949832776e-06, "loss": 0.1886, "step": 45350 }, { "epoch": 1.79, "grad_norm": 1.4814435621208106, "learning_rate": 4.940943143812709e-06, "loss": 0.2175, "step": 45375 }, { "epoch": 1.79, "grad_norm": 2.3340113593410514, "learning_rate": 4.940901337792642e-06, "loss": 0.2064, "step": 45400 }, { "epoch": 1.79, "grad_norm": 1.7179296631796945, "learning_rate": 4.940859531772576e-06, "loss": 0.2027, "step": 45425 }, { "epoch": 1.79, "grad_norm": 1.3525854738210719, "learning_rate": 4.940817725752508e-06, "loss": 0.2069, "step": 45450 }, { "epoch": 1.79, "grad_norm": 1.1243049994361427, "learning_rate": 4.940775919732442e-06, "loss": 0.1942, "step": 45475 }, { "epoch": 1.79, "grad_norm": 1.2883069022850577, "learning_rate": 4.9407341137123746e-06, "loss": 0.1776, "step": 45500 }, { "epoch": 1.79, "grad_norm": 1.3987378560793549, "learning_rate": 4.940692307692308e-06, "loss": 0.2085, "step": 45525 }, { "epoch": 1.79, "grad_norm": 1.8141337973143972, "learning_rate": 4.940650501672241e-06, "loss": 0.2077, "step": 45550 }, { "epoch": 1.79, "grad_norm": 2.1709295579225634, "learning_rate": 4.940608695652174e-06, "loss": 0.1949, "step": 45575 }, { "epoch": 1.79, "grad_norm": 1.5535520468286816, "learning_rate": 4.940566889632107e-06, "loss": 0.1988, "step": 45600 }, { "epoch": 1.79, "grad_norm": 1.9291991595641431, "learning_rate": 4.940525083612041e-06, "loss": 0.2142, "step": 45625 }, { "epoch": 1.8, "grad_norm": 1.944908850889749, "learning_rate": 4.940483277591973e-06, "loss": 0.2189, "step": 45650 }, { "epoch": 1.8, "grad_norm": 1.8924849523796625, "learning_rate": 4.940441471571907e-06, "loss": 0.2042, "step": 45675 }, { "epoch": 1.8, "grad_norm": 1.0381586631748612, "learning_rate": 4.9403996655518395e-06, "loss": 0.1973, "step": 45700 }, { "epoch": 1.8, "grad_norm": 1.596229546211035, "learning_rate": 4.940357859531773e-06, "loss": 0.1971, "step": 45725 }, { "epoch": 1.8, "grad_norm": 1.478086237165827, "learning_rate": 4.940316053511706e-06, "loss": 0.2172, "step": 45750 }, { "epoch": 1.8, "grad_norm": 1.559672032174473, "learning_rate": 4.940274247491639e-06, "loss": 0.2215, "step": 45775 }, { "epoch": 1.8, "grad_norm": 1.2609063742885178, "learning_rate": 4.940232441471572e-06, "loss": 0.1815, "step": 45800 }, { "epoch": 1.8, "grad_norm": 1.2924032797525853, "learning_rate": 4.9401906354515056e-06, "loss": 0.2064, "step": 45825 }, { "epoch": 1.8, "grad_norm": 2.4174361862563374, "learning_rate": 4.940148829431439e-06, "loss": 0.217, "step": 45850 }, { "epoch": 1.8, "grad_norm": 1.373477111385322, "learning_rate": 4.940107023411372e-06, "loss": 0.1953, "step": 45875 }, { "epoch": 1.81, "grad_norm": 1.8558451130861622, "learning_rate": 4.940065217391305e-06, "loss": 0.2054, "step": 45900 }, { "epoch": 1.81, "grad_norm": 1.6990765889691408, "learning_rate": 4.940023411371238e-06, "loss": 0.1899, "step": 45925 }, { "epoch": 1.81, "grad_norm": 1.8060030463753445, "learning_rate": 4.939981605351172e-06, "loss": 0.2202, "step": 45950 }, { "epoch": 1.81, "grad_norm": 1.0036006697315363, "learning_rate": 4.939939799331104e-06, "loss": 0.1892, "step": 45975 }, { "epoch": 1.81, "grad_norm": 1.5889893762168747, "learning_rate": 4.939897993311037e-06, "loss": 0.2014, "step": 46000 }, { "epoch": 1.81, "grad_norm": 1.2885407324111298, "learning_rate": 4.93985618729097e-06, "loss": 0.2022, "step": 46025 }, { "epoch": 1.81, "grad_norm": 1.4911043166689495, "learning_rate": 4.939814381270903e-06, "loss": 0.219, "step": 46050 }, { "epoch": 1.81, "grad_norm": 1.2755317856393762, "learning_rate": 4.939772575250836e-06, "loss": 0.2054, "step": 46075 }, { "epoch": 1.81, "grad_norm": 1.994187232620831, "learning_rate": 4.9397307692307695e-06, "loss": 0.1891, "step": 46100 }, { "epoch": 1.81, "grad_norm": 1.5475701584616484, "learning_rate": 4.939688963210702e-06, "loss": 0.2022, "step": 46125 }, { "epoch": 1.82, "grad_norm": 1.4436373558259408, "learning_rate": 4.939647157190636e-06, "loss": 0.197, "step": 46150 }, { "epoch": 1.82, "grad_norm": 1.7292220715855722, "learning_rate": 4.9396053511705684e-06, "loss": 0.2127, "step": 46175 }, { "epoch": 1.82, "grad_norm": 2.00863798888671, "learning_rate": 4.939563545150502e-06, "loss": 0.1874, "step": 46200 }, { "epoch": 1.82, "grad_norm": 1.575347210475048, "learning_rate": 4.939521739130435e-06, "loss": 0.2142, "step": 46225 }, { "epoch": 1.82, "grad_norm": 1.8865538136561686, "learning_rate": 4.9394816053511705e-06, "loss": 0.194, "step": 46250 }, { "epoch": 1.82, "grad_norm": 1.3867064113440684, "learning_rate": 4.939439799331104e-06, "loss": 0.2238, "step": 46275 }, { "epoch": 1.82, "grad_norm": 1.6636489722849195, "learning_rate": 4.939397993311037e-06, "loss": 0.2124, "step": 46300 }, { "epoch": 1.82, "grad_norm": 1.796035846087043, "learning_rate": 4.93935618729097e-06, "loss": 0.2086, "step": 46325 }, { "epoch": 1.82, "grad_norm": 1.6152861037902768, "learning_rate": 4.939314381270903e-06, "loss": 0.2194, "step": 46350 }, { "epoch": 1.82, "grad_norm": 0.872021389069065, "learning_rate": 4.9392725752508365e-06, "loss": 0.1983, "step": 46375 }, { "epoch": 1.83, "grad_norm": 1.3627173247663622, "learning_rate": 4.939230769230769e-06, "loss": 0.2228, "step": 46400 }, { "epoch": 1.83, "grad_norm": 2.314259549099962, "learning_rate": 4.939188963210703e-06, "loss": 0.1979, "step": 46425 }, { "epoch": 1.83, "grad_norm": 1.6032460211094444, "learning_rate": 4.9391471571906355e-06, "loss": 0.2278, "step": 46450 }, { "epoch": 1.83, "grad_norm": 1.775553525039903, "learning_rate": 4.939105351170569e-06, "loss": 0.2044, "step": 46475 }, { "epoch": 1.83, "grad_norm": 1.800363727354035, "learning_rate": 4.939063545150502e-06, "loss": 0.2143, "step": 46500 }, { "epoch": 1.83, "grad_norm": 1.2465956627042085, "learning_rate": 4.939021739130435e-06, "loss": 0.2043, "step": 46525 }, { "epoch": 1.83, "grad_norm": 1.8944500939180655, "learning_rate": 4.938979933110368e-06, "loss": 0.2063, "step": 46550 }, { "epoch": 1.83, "grad_norm": 1.5117363359679743, "learning_rate": 4.9389381270903015e-06, "loss": 0.2122, "step": 46575 }, { "epoch": 1.83, "grad_norm": 1.5887958169703909, "learning_rate": 4.938896321070234e-06, "loss": 0.221, "step": 46600 }, { "epoch": 1.83, "grad_norm": 1.648221177617564, "learning_rate": 4.938854515050168e-06, "loss": 0.1771, "step": 46625 }, { "epoch": 1.84, "grad_norm": 1.55324428745445, "learning_rate": 4.9388127090301005e-06, "loss": 0.1883, "step": 46650 }, { "epoch": 1.84, "grad_norm": 1.7723178618451372, "learning_rate": 4.938770903010034e-06, "loss": 0.1927, "step": 46675 }, { "epoch": 1.84, "grad_norm": 2.153448369196902, "learning_rate": 4.938729096989967e-06, "loss": 0.2044, "step": 46700 }, { "epoch": 1.84, "grad_norm": 2.1268320701701797, "learning_rate": 4.9386872909699e-06, "loss": 0.2428, "step": 46725 }, { "epoch": 1.84, "grad_norm": 1.3631634906977346, "learning_rate": 4.938645484949833e-06, "loss": 0.1857, "step": 46750 }, { "epoch": 1.84, "grad_norm": 1.6168270762112813, "learning_rate": 4.9386036789297665e-06, "loss": 0.2011, "step": 46775 }, { "epoch": 1.84, "grad_norm": 1.6696110343047001, "learning_rate": 4.938561872909699e-06, "loss": 0.2204, "step": 46800 }, { "epoch": 1.84, "grad_norm": 1.192494551736515, "learning_rate": 4.938520066889633e-06, "loss": 0.2038, "step": 46825 }, { "epoch": 1.84, "grad_norm": 1.7077939692501043, "learning_rate": 4.9384782608695654e-06, "loss": 0.1955, "step": 46850 }, { "epoch": 1.84, "grad_norm": 1.8191368453499213, "learning_rate": 4.938436454849499e-06, "loss": 0.208, "step": 46875 }, { "epoch": 1.85, "grad_norm": 1.5432423506062156, "learning_rate": 4.938394648829432e-06, "loss": 0.2058, "step": 46900 }, { "epoch": 1.85, "grad_norm": 1.9468989423937535, "learning_rate": 4.938352842809365e-06, "loss": 0.1768, "step": 46925 }, { "epoch": 1.85, "grad_norm": 1.7161970238767805, "learning_rate": 4.938311036789298e-06, "loss": 0.2131, "step": 46950 }, { "epoch": 1.85, "grad_norm": 1.5485071588534483, "learning_rate": 4.938269230769231e-06, "loss": 0.2218, "step": 46975 }, { "epoch": 1.85, "grad_norm": 2.53263925456021, "learning_rate": 4.938227424749164e-06, "loss": 0.2235, "step": 47000 }, { "epoch": 1.85, "grad_norm": 1.8192841664528814, "learning_rate": 4.938185618729097e-06, "loss": 0.1992, "step": 47025 }, { "epoch": 1.85, "grad_norm": 2.115023350287966, "learning_rate": 4.93814381270903e-06, "loss": 0.2213, "step": 47050 }, { "epoch": 1.85, "grad_norm": 1.5380163293793399, "learning_rate": 4.938102006688963e-06, "loss": 0.1992, "step": 47075 }, { "epoch": 1.85, "grad_norm": 1.4449128177392452, "learning_rate": 4.938060200668897e-06, "loss": 0.2181, "step": 47100 }, { "epoch": 1.85, "grad_norm": 1.8029578697927904, "learning_rate": 4.938018394648829e-06, "loss": 0.1921, "step": 47125 }, { "epoch": 1.85, "grad_norm": 2.0395632871911147, "learning_rate": 4.937976588628763e-06, "loss": 0.1895, "step": 47150 }, { "epoch": 1.86, "grad_norm": 1.3878968532309697, "learning_rate": 4.937934782608696e-06, "loss": 0.1842, "step": 47175 }, { "epoch": 1.86, "grad_norm": 1.3434122603027006, "learning_rate": 4.937892976588629e-06, "loss": 0.2056, "step": 47200 }, { "epoch": 1.86, "grad_norm": 1.3490671768973181, "learning_rate": 4.937851170568562e-06, "loss": 0.1866, "step": 47225 }, { "epoch": 1.86, "grad_norm": 1.613883740225785, "learning_rate": 4.937811036789298e-06, "loss": 0.1979, "step": 47250 }, { "epoch": 1.86, "grad_norm": 1.7926615672094046, "learning_rate": 4.937769230769231e-06, "loss": 0.1977, "step": 47275 }, { "epoch": 1.86, "grad_norm": 2.484911009082714, "learning_rate": 4.937727424749164e-06, "loss": 0.2022, "step": 47300 }, { "epoch": 1.86, "grad_norm": 1.9218949249621484, "learning_rate": 4.9376856187290975e-06, "loss": 0.2221, "step": 47325 }, { "epoch": 1.86, "grad_norm": 1.4316456230383503, "learning_rate": 4.937645484949833e-06, "loss": 0.1902, "step": 47350 }, { "epoch": 1.86, "grad_norm": 1.7052129122552293, "learning_rate": 4.937603678929767e-06, "loss": 0.2221, "step": 47375 }, { "epoch": 1.86, "grad_norm": 1.2637211421190688, "learning_rate": 4.9375618729096995e-06, "loss": 0.2058, "step": 47400 }, { "epoch": 1.87, "grad_norm": 1.8451450491794519, "learning_rate": 4.937520066889632e-06, "loss": 0.1993, "step": 47425 }, { "epoch": 1.87, "grad_norm": 0.9283921636952341, "learning_rate": 4.937478260869566e-06, "loss": 0.2055, "step": 47450 }, { "epoch": 1.87, "grad_norm": 1.8414663077620397, "learning_rate": 4.9374364548494985e-06, "loss": 0.2203, "step": 47475 }, { "epoch": 1.87, "grad_norm": 2.0753580914227365, "learning_rate": 4.937394648829432e-06, "loss": 0.213, "step": 47500 }, { "epoch": 1.87, "grad_norm": 1.2078240932003506, "learning_rate": 4.937352842809365e-06, "loss": 0.2136, "step": 47525 }, { "epoch": 1.87, "grad_norm": 1.7304545694227498, "learning_rate": 4.937311036789298e-06, "loss": 0.215, "step": 47550 }, { "epoch": 1.87, "grad_norm": 1.6773895264603955, "learning_rate": 4.937269230769231e-06, "loss": 0.184, "step": 47575 }, { "epoch": 1.87, "grad_norm": 1.6133216723159176, "learning_rate": 4.9372274247491645e-06, "loss": 0.2233, "step": 47600 }, { "epoch": 1.87, "grad_norm": 2.3083125789789483, "learning_rate": 4.937185618729097e-06, "loss": 0.2224, "step": 47625 }, { "epoch": 1.87, "grad_norm": 1.6499407481703168, "learning_rate": 4.937143812709031e-06, "loss": 0.2013, "step": 47650 }, { "epoch": 1.88, "grad_norm": 1.9096100207893898, "learning_rate": 4.9371020066889635e-06, "loss": 0.1966, "step": 47675 }, { "epoch": 1.88, "grad_norm": 2.0102219012043174, "learning_rate": 4.937060200668897e-06, "loss": 0.2247, "step": 47700 }, { "epoch": 1.88, "grad_norm": 1.8440109791151151, "learning_rate": 4.93701839464883e-06, "loss": 0.1832, "step": 47725 }, { "epoch": 1.88, "grad_norm": 1.6405657863329357, "learning_rate": 4.936976588628763e-06, "loss": 0.2021, "step": 47750 }, { "epoch": 1.88, "grad_norm": 1.5352039277718714, "learning_rate": 4.936934782608696e-06, "loss": 0.2106, "step": 47775 }, { "epoch": 1.88, "grad_norm": 1.218168105886781, "learning_rate": 4.9368929765886295e-06, "loss": 0.2028, "step": 47800 }, { "epoch": 1.88, "grad_norm": 0.9303573799655196, "learning_rate": 4.936851170568562e-06, "loss": 0.208, "step": 47825 }, { "epoch": 1.88, "grad_norm": 1.3720528426394387, "learning_rate": 4.936809364548496e-06, "loss": 0.213, "step": 47850 }, { "epoch": 1.88, "grad_norm": 1.330231745796533, "learning_rate": 4.9367675585284284e-06, "loss": 0.194, "step": 47875 }, { "epoch": 1.88, "grad_norm": 1.6102889821188229, "learning_rate": 4.936725752508362e-06, "loss": 0.2059, "step": 47900 }, { "epoch": 1.89, "grad_norm": 1.7900179616799938, "learning_rate": 4.936683946488295e-06, "loss": 0.2099, "step": 47925 }, { "epoch": 1.89, "grad_norm": 1.876167524982635, "learning_rate": 4.936642140468228e-06, "loss": 0.2108, "step": 47950 }, { "epoch": 1.89, "grad_norm": 1.3715764517785491, "learning_rate": 4.936600334448161e-06, "loss": 0.2118, "step": 47975 }, { "epoch": 1.89, "grad_norm": 1.242091850621187, "learning_rate": 4.9365585284280945e-06, "loss": 0.2091, "step": 48000 }, { "epoch": 1.89, "grad_norm": 2.381253912271751, "learning_rate": 4.936516722408027e-06, "loss": 0.1934, "step": 48025 }, { "epoch": 1.89, "grad_norm": 1.412172971145708, "learning_rate": 4.936474916387961e-06, "loss": 0.2054, "step": 48050 }, { "epoch": 1.89, "grad_norm": 1.5116219256655863, "learning_rate": 4.936433110367893e-06, "loss": 0.2155, "step": 48075 }, { "epoch": 1.89, "grad_norm": 1.2887142644122933, "learning_rate": 4.936391304347827e-06, "loss": 0.2091, "step": 48100 }, { "epoch": 1.89, "grad_norm": 1.6820263408725942, "learning_rate": 4.93634949832776e-06, "loss": 0.1912, "step": 48125 }, { "epoch": 1.89, "grad_norm": 1.5815843596491972, "learning_rate": 4.936307692307692e-06, "loss": 0.2055, "step": 48150 }, { "epoch": 1.9, "grad_norm": 1.6081837772093868, "learning_rate": 4.936265886287626e-06, "loss": 0.2117, "step": 48175 }, { "epoch": 1.9, "grad_norm": 1.4467885553172026, "learning_rate": 4.936224080267559e-06, "loss": 0.216, "step": 48200 }, { "epoch": 1.9, "grad_norm": 1.1519428579352176, "learning_rate": 4.936182274247492e-06, "loss": 0.2017, "step": 48225 }, { "epoch": 1.9, "grad_norm": 1.5300606843300384, "learning_rate": 4.936140468227425e-06, "loss": 0.205, "step": 48250 }, { "epoch": 1.9, "grad_norm": 1.720404121032897, "learning_rate": 4.936098662207358e-06, "loss": 0.2007, "step": 48275 }, { "epoch": 1.9, "grad_norm": 1.6425840844489246, "learning_rate": 4.936056856187291e-06, "loss": 0.1853, "step": 48300 }, { "epoch": 1.9, "grad_norm": 1.459659103020749, "learning_rate": 4.936015050167225e-06, "loss": 0.2116, "step": 48325 }, { "epoch": 1.9, "grad_norm": 1.3712585119025817, "learning_rate": 4.935973244147157e-06, "loss": 0.2106, "step": 48350 }, { "epoch": 1.9, "grad_norm": 2.2759914401336694, "learning_rate": 4.935931438127091e-06, "loss": 0.2099, "step": 48375 }, { "epoch": 1.9, "grad_norm": 1.1003315934242315, "learning_rate": 4.935889632107024e-06, "loss": 0.2006, "step": 48400 }, { "epoch": 1.91, "grad_norm": 1.1531531222871791, "learning_rate": 4.935847826086957e-06, "loss": 0.1993, "step": 48425 }, { "epoch": 1.91, "grad_norm": 1.9522070688750959, "learning_rate": 4.93580602006689e-06, "loss": 0.2029, "step": 48450 }, { "epoch": 1.91, "grad_norm": 0.9647196482698353, "learning_rate": 4.935764214046823e-06, "loss": 0.1777, "step": 48475 }, { "epoch": 1.91, "grad_norm": 2.357874217165264, "learning_rate": 4.935722408026756e-06, "loss": 0.202, "step": 48500 }, { "epoch": 1.91, "grad_norm": 1.706086540465037, "learning_rate": 4.93568060200669e-06, "loss": 0.2099, "step": 48525 }, { "epoch": 1.91, "grad_norm": 1.368230675138682, "learning_rate": 4.935638795986622e-06, "loss": 0.1893, "step": 48550 }, { "epoch": 1.91, "grad_norm": 2.1989144444420323, "learning_rate": 4.935596989966556e-06, "loss": 0.234, "step": 48575 }, { "epoch": 1.91, "grad_norm": 1.3439068076959173, "learning_rate": 4.9355551839464886e-06, "loss": 0.1919, "step": 48600 }, { "epoch": 1.91, "grad_norm": 1.5347802892044207, "learning_rate": 4.935513377926422e-06, "loss": 0.2087, "step": 48625 }, { "epoch": 1.91, "grad_norm": 1.3853060269687627, "learning_rate": 4.935471571906355e-06, "loss": 0.2153, "step": 48650 }, { "epoch": 1.91, "grad_norm": 1.2705654843241438, "learning_rate": 4.935429765886288e-06, "loss": 0.1942, "step": 48675 }, { "epoch": 1.92, "grad_norm": 1.507043182236023, "learning_rate": 4.935387959866221e-06, "loss": 0.2052, "step": 48700 }, { "epoch": 1.92, "grad_norm": 0.8852640991515132, "learning_rate": 4.935346153846155e-06, "loss": 0.2126, "step": 48725 }, { "epoch": 1.92, "grad_norm": 1.2880272534231267, "learning_rate": 4.935304347826087e-06, "loss": 0.1823, "step": 48750 }, { "epoch": 1.92, "grad_norm": 1.3419848634582663, "learning_rate": 4.935262541806021e-06, "loss": 0.2085, "step": 48775 }, { "epoch": 1.92, "grad_norm": 1.2454273911509375, "learning_rate": 4.9352207357859535e-06, "loss": 0.198, "step": 48800 }, { "epoch": 1.92, "grad_norm": 1.1964741196519708, "learning_rate": 4.935178929765886e-06, "loss": 0.194, "step": 48825 }, { "epoch": 1.92, "grad_norm": 1.482373609512503, "learning_rate": 4.93513712374582e-06, "loss": 0.1923, "step": 48850 }, { "epoch": 1.92, "grad_norm": 1.4343822110286697, "learning_rate": 4.9350953177257525e-06, "loss": 0.201, "step": 48875 }, { "epoch": 1.92, "grad_norm": 1.9110162286931605, "learning_rate": 4.935053511705686e-06, "loss": 0.2177, "step": 48900 }, { "epoch": 1.92, "grad_norm": 1.4055468589424738, "learning_rate": 4.935011705685619e-06, "loss": 0.1975, "step": 48925 }, { "epoch": 1.93, "grad_norm": 1.1095321871083568, "learning_rate": 4.934969899665552e-06, "loss": 0.2038, "step": 48950 }, { "epoch": 1.93, "grad_norm": 1.6489177988410992, "learning_rate": 4.934928093645485e-06, "loss": 0.2045, "step": 48975 }, { "epoch": 1.93, "grad_norm": 1.0969848442231984, "learning_rate": 4.9348862876254185e-06, "loss": 0.1798, "step": 49000 }, { "epoch": 1.93, "grad_norm": 2.431163706761367, "learning_rate": 4.934844481605351e-06, "loss": 0.2148, "step": 49025 }, { "epoch": 1.93, "grad_norm": 1.1269539448904287, "learning_rate": 4.934802675585285e-06, "loss": 0.1913, "step": 49050 }, { "epoch": 1.93, "grad_norm": 2.009600750564663, "learning_rate": 4.9347608695652175e-06, "loss": 0.1902, "step": 49075 }, { "epoch": 1.93, "grad_norm": 1.6341443792861832, "learning_rate": 4.934719063545151e-06, "loss": 0.2069, "step": 49100 }, { "epoch": 1.93, "grad_norm": 1.4131602535107552, "learning_rate": 4.934677257525084e-06, "loss": 0.1899, "step": 49125 }, { "epoch": 1.93, "grad_norm": 0.8377130239703243, "learning_rate": 4.934635451505017e-06, "loss": 0.2038, "step": 49150 }, { "epoch": 1.93, "grad_norm": 1.9368805923358994, "learning_rate": 4.93459364548495e-06, "loss": 0.2089, "step": 49175 }, { "epoch": 1.94, "grad_norm": 1.6406415730986539, "learning_rate": 4.9345518394648835e-06, "loss": 0.2023, "step": 49200 }, { "epoch": 1.94, "grad_norm": 1.5142583337522033, "learning_rate": 4.934510033444816e-06, "loss": 0.2124, "step": 49225 }, { "epoch": 1.94, "grad_norm": 1.4175403686258068, "learning_rate": 4.93446822742475e-06, "loss": 0.1889, "step": 49250 }, { "epoch": 1.94, "grad_norm": 1.680924590381479, "learning_rate": 4.9344264214046824e-06, "loss": 0.18, "step": 49275 }, { "epoch": 1.94, "grad_norm": 2.4182165052837803, "learning_rate": 4.934384615384616e-06, "loss": 0.199, "step": 49300 }, { "epoch": 1.94, "grad_norm": 1.5324006988373318, "learning_rate": 4.934342809364549e-06, "loss": 0.2107, "step": 49325 }, { "epoch": 1.94, "grad_norm": 1.8984134230654337, "learning_rate": 4.9343026755852845e-06, "loss": 0.2195, "step": 49350 }, { "epoch": 1.94, "grad_norm": 1.8449500063553705, "learning_rate": 4.934260869565218e-06, "loss": 0.1778, "step": 49375 }, { "epoch": 1.94, "grad_norm": 1.9070885133107418, "learning_rate": 4.934219063545151e-06, "loss": 0.2199, "step": 49400 }, { "epoch": 1.94, "grad_norm": 1.528503320386569, "learning_rate": 4.934177257525084e-06, "loss": 0.2003, "step": 49425 }, { "epoch": 1.95, "grad_norm": 1.6667344856090824, "learning_rate": 4.934135451505017e-06, "loss": 0.2131, "step": 49450 }, { "epoch": 1.95, "grad_norm": 1.4152939504446955, "learning_rate": 4.9340936454849505e-06, "loss": 0.1976, "step": 49475 }, { "epoch": 1.95, "grad_norm": 1.2226774657729198, "learning_rate": 4.934051839464883e-06, "loss": 0.194, "step": 49500 }, { "epoch": 1.95, "grad_norm": 1.1414769595580594, "learning_rate": 4.934010033444817e-06, "loss": 0.2142, "step": 49525 }, { "epoch": 1.95, "grad_norm": 1.3683179118744992, "learning_rate": 4.9339682274247495e-06, "loss": 0.209, "step": 49550 }, { "epoch": 1.95, "grad_norm": 0.8797811276296436, "learning_rate": 4.933926421404683e-06, "loss": 0.1757, "step": 49575 }, { "epoch": 1.95, "grad_norm": 1.6240088021552774, "learning_rate": 4.933884615384616e-06, "loss": 0.2065, "step": 49600 }, { "epoch": 1.95, "grad_norm": 1.3496244482020072, "learning_rate": 4.933842809364549e-06, "loss": 0.1884, "step": 49625 }, { "epoch": 1.95, "grad_norm": 1.9065579862207658, "learning_rate": 4.933801003344482e-06, "loss": 0.2219, "step": 49650 }, { "epoch": 1.95, "grad_norm": 1.6823942734846893, "learning_rate": 4.9337591973244155e-06, "loss": 0.1922, "step": 49675 }, { "epoch": 1.96, "grad_norm": 1.610332780860586, "learning_rate": 4.933717391304348e-06, "loss": 0.2257, "step": 49700 }, { "epoch": 1.96, "grad_norm": 1.4749993288326266, "learning_rate": 4.933675585284282e-06, "loss": 0.2057, "step": 49725 }, { "epoch": 1.96, "grad_norm": 1.1293208475215297, "learning_rate": 4.9336337792642145e-06, "loss": 0.2238, "step": 49750 }, { "epoch": 1.96, "grad_norm": 0.9704102752018713, "learning_rate": 4.933591973244147e-06, "loss": 0.1962, "step": 49775 }, { "epoch": 1.96, "grad_norm": 1.3546928779201595, "learning_rate": 4.933550167224081e-06, "loss": 0.1999, "step": 49800 }, { "epoch": 1.96, "grad_norm": 1.647795435094045, "learning_rate": 4.933508361204013e-06, "loss": 0.187, "step": 49825 }, { "epoch": 1.96, "grad_norm": 1.1719791408575768, "learning_rate": 4.933466555183947e-06, "loss": 0.1982, "step": 49850 }, { "epoch": 1.96, "grad_norm": 1.410677691655115, "learning_rate": 4.93342474916388e-06, "loss": 0.2037, "step": 49875 }, { "epoch": 1.96, "grad_norm": 1.9028353705603924, "learning_rate": 4.933382943143813e-06, "loss": 0.1955, "step": 49900 }, { "epoch": 1.96, "grad_norm": 1.7600379755892537, "learning_rate": 4.933341137123746e-06, "loss": 0.2016, "step": 49925 }, { "epoch": 1.97, "grad_norm": 1.340426249993996, "learning_rate": 4.9332993311036794e-06, "loss": 0.213, "step": 49950 }, { "epoch": 1.97, "grad_norm": 1.7078496902487155, "learning_rate": 4.933257525083612e-06, "loss": 0.2066, "step": 49975 }, { "epoch": 1.97, "grad_norm": 2.3783688063483632, "learning_rate": 4.933215719063546e-06, "loss": 0.2034, "step": 50000 }, { "epoch": 1.97, "eval_loss": 0.49365234375, "eval_runtime": 11498.4763, "eval_samples_per_second": 0.823, "eval_steps_per_second": 0.051, "eval_wer": 0.11952447597445426, "step": 50000 }, { "epoch": 1.97, "grad_norm": 1.9977781030077004, "learning_rate": 4.933173913043478e-06, "loss": 0.1802, "step": 50025 }, { "epoch": 1.97, "grad_norm": 1.5811010265422427, "learning_rate": 4.933132107023412e-06, "loss": 0.2066, "step": 50050 }, { "epoch": 1.97, "grad_norm": 1.8367235353607692, "learning_rate": 4.933090301003345e-06, "loss": 0.2188, "step": 50075 }, { "epoch": 1.97, "grad_norm": 1.4262908481666672, "learning_rate": 4.933048494983278e-06, "loss": 0.1738, "step": 50100 }, { "epoch": 1.97, "grad_norm": 0.8182130900532735, "learning_rate": 4.933006688963211e-06, "loss": 0.2088, "step": 50125 }, { "epoch": 1.97, "grad_norm": 1.0473337182584366, "learning_rate": 4.932964882943144e-06, "loss": 0.2347, "step": 50150 }, { "epoch": 1.97, "grad_norm": 1.2163618200194586, "learning_rate": 4.932923076923077e-06, "loss": 0.2046, "step": 50175 }, { "epoch": 1.97, "grad_norm": 1.7969452660629082, "learning_rate": 4.932881270903011e-06, "loss": 0.1909, "step": 50200 }, { "epoch": 1.98, "grad_norm": 1.8467324578026039, "learning_rate": 4.932839464882943e-06, "loss": 0.2108, "step": 50225 }, { "epoch": 1.98, "grad_norm": 2.217318536045435, "learning_rate": 4.932797658862877e-06, "loss": 0.2038, "step": 50250 }, { "epoch": 1.98, "grad_norm": 1.4419104306585004, "learning_rate": 4.93275585284281e-06, "loss": 0.2081, "step": 50275 }, { "epoch": 1.98, "grad_norm": 1.6288965207748185, "learning_rate": 4.932714046822743e-06, "loss": 0.2051, "step": 50300 }, { "epoch": 1.98, "grad_norm": 1.3830015306405978, "learning_rate": 4.932672240802676e-06, "loss": 0.1817, "step": 50325 }, { "epoch": 1.98, "grad_norm": 1.85804110304336, "learning_rate": 4.932632107023412e-06, "loss": 0.2122, "step": 50350 }, { "epoch": 1.98, "grad_norm": 1.268213957087889, "learning_rate": 4.932590301003345e-06, "loss": 0.2063, "step": 50375 }, { "epoch": 1.98, "grad_norm": 1.985988233626404, "learning_rate": 4.932548494983278e-06, "loss": 0.2142, "step": 50400 }, { "epoch": 1.98, "grad_norm": 2.0433158965290197, "learning_rate": 4.9325066889632115e-06, "loss": 0.195, "step": 50425 }, { "epoch": 1.98, "grad_norm": 1.619354197211885, "learning_rate": 4.932464882943144e-06, "loss": 0.1964, "step": 50450 }, { "epoch": 1.99, "grad_norm": 1.4976821988213502, "learning_rate": 4.932423076923078e-06, "loss": 0.2149, "step": 50475 }, { "epoch": 1.99, "grad_norm": 2.420182183432332, "learning_rate": 4.93238127090301e-06, "loss": 0.2245, "step": 50500 }, { "epoch": 1.99, "grad_norm": 1.9459403489592213, "learning_rate": 4.932339464882944e-06, "loss": 0.2198, "step": 50525 }, { "epoch": 1.99, "grad_norm": 1.5861503958567087, "learning_rate": 4.932297658862877e-06, "loss": 0.2204, "step": 50550 }, { "epoch": 1.99, "grad_norm": 0.8998163673028098, "learning_rate": 4.93225585284281e-06, "loss": 0.2106, "step": 50575 }, { "epoch": 1.99, "grad_norm": 1.331165993976876, "learning_rate": 4.932214046822743e-06, "loss": 0.1854, "step": 50600 }, { "epoch": 1.99, "grad_norm": 1.733723317933618, "learning_rate": 4.9321722408026764e-06, "loss": 0.1989, "step": 50625 }, { "epoch": 1.99, "grad_norm": 1.78159021767067, "learning_rate": 4.932130434782609e-06, "loss": 0.2156, "step": 50650 }, { "epoch": 1.99, "grad_norm": 1.8765020453068406, "learning_rate": 4.932088628762543e-06, "loss": 0.1994, "step": 50675 }, { "epoch": 1.99, "grad_norm": 1.2239774542089124, "learning_rate": 4.932046822742475e-06, "loss": 0.1955, "step": 50700 }, { "epoch": 2.0, "grad_norm": 1.182462271207217, "learning_rate": 4.932005016722408e-06, "loss": 0.1945, "step": 50725 }, { "epoch": 2.0, "grad_norm": 1.8092880077254656, "learning_rate": 4.931963210702342e-06, "loss": 0.1952, "step": 50750 }, { "epoch": 2.0, "grad_norm": 1.6754850868214068, "learning_rate": 4.931921404682274e-06, "loss": 0.2224, "step": 50775 }, { "epoch": 2.0, "grad_norm": 1.0066490763989873, "learning_rate": 4.931879598662208e-06, "loss": 0.1988, "step": 50800 }, { "epoch": 2.0, "grad_norm": 1.3175579753366817, "learning_rate": 4.9318377926421406e-06, "loss": 0.2053, "step": 50825 }, { "epoch": 2.0, "grad_norm": 1.3893938429753403, "learning_rate": 4.931795986622074e-06, "loss": 0.1887, "step": 50850 }, { "epoch": 2.0, "grad_norm": 1.6195076889793714, "learning_rate": 4.931754180602007e-06, "loss": 0.1549, "step": 50875 }, { "epoch": 2.0, "grad_norm": 2.5831100199621515, "learning_rate": 4.93171237458194e-06, "loss": 0.1559, "step": 50900 }, { "epoch": 2.0, "grad_norm": 2.280031099022124, "learning_rate": 4.931670568561873e-06, "loss": 0.1634, "step": 50925 }, { "epoch": 2.0, "grad_norm": 2.0793384529410677, "learning_rate": 4.931628762541807e-06, "loss": 0.1563, "step": 50950 }, { "epoch": 2.01, "grad_norm": 1.668781784899802, "learning_rate": 4.931586956521739e-06, "loss": 0.1525, "step": 50975 }, { "epoch": 2.01, "grad_norm": 2.532539607395984, "learning_rate": 4.931545150501673e-06, "loss": 0.1557, "step": 51000 }, { "epoch": 2.01, "grad_norm": 1.748971811774227, "learning_rate": 4.9315033444816056e-06, "loss": 0.1626, "step": 51025 }, { "epoch": 2.01, "grad_norm": 1.759658061175033, "learning_rate": 4.931461538461539e-06, "loss": 0.1551, "step": 51050 }, { "epoch": 2.01, "grad_norm": 1.899714433160585, "learning_rate": 4.931419732441472e-06, "loss": 0.1474, "step": 51075 }, { "epoch": 2.01, "grad_norm": 3.3506408589981036, "learning_rate": 4.931377926421405e-06, "loss": 0.1537, "step": 51100 }, { "epoch": 2.01, "grad_norm": 2.445193092719799, "learning_rate": 4.931336120401338e-06, "loss": 0.1553, "step": 51125 }, { "epoch": 2.01, "grad_norm": 2.018152535354776, "learning_rate": 4.931294314381272e-06, "loss": 0.1452, "step": 51150 }, { "epoch": 2.01, "grad_norm": 1.2715640087113764, "learning_rate": 4.931252508361204e-06, "loss": 0.159, "step": 51175 }, { "epoch": 2.01, "grad_norm": 2.6259858096583195, "learning_rate": 4.931210702341138e-06, "loss": 0.1432, "step": 51200 }, { "epoch": 2.02, "grad_norm": 2.0134734462630113, "learning_rate": 4.9311688963210705e-06, "loss": 0.1746, "step": 51225 }, { "epoch": 2.02, "grad_norm": 2.53530897566775, "learning_rate": 4.931127090301004e-06, "loss": 0.1572, "step": 51250 }, { "epoch": 2.02, "grad_norm": 2.319671554271341, "learning_rate": 4.931085284280937e-06, "loss": 0.1702, "step": 51275 }, { "epoch": 2.02, "grad_norm": 2.1657373621605718, "learning_rate": 4.93104347826087e-06, "loss": 0.1778, "step": 51300 }, { "epoch": 2.02, "grad_norm": 1.8207220923214296, "learning_rate": 4.931001672240803e-06, "loss": 0.1752, "step": 51325 }, { "epoch": 2.02, "grad_norm": 1.6051555436818306, "learning_rate": 4.930961538461539e-06, "loss": 0.1463, "step": 51350 }, { "epoch": 2.02, "grad_norm": 3.2631241428045374, "learning_rate": 4.930919732441472e-06, "loss": 0.1562, "step": 51375 }, { "epoch": 2.02, "grad_norm": 2.733310564982773, "learning_rate": 4.930877926421405e-06, "loss": 0.1436, "step": 51400 }, { "epoch": 2.02, "grad_norm": 3.0421003211716813, "learning_rate": 4.930836120401339e-06, "loss": 0.1439, "step": 51425 }, { "epoch": 2.02, "grad_norm": 1.8208998532386054, "learning_rate": 4.930794314381271e-06, "loss": 0.1587, "step": 51450 }, { "epoch": 2.03, "grad_norm": 1.619487202524097, "learning_rate": 4.930752508361205e-06, "loss": 0.1508, "step": 51475 }, { "epoch": 2.03, "grad_norm": 2.4972195849639807, "learning_rate": 4.930710702341138e-06, "loss": 0.1439, "step": 51500 }, { "epoch": 2.03, "grad_norm": 2.0416854490311533, "learning_rate": 4.930668896321071e-06, "loss": 0.1682, "step": 51525 }, { "epoch": 2.03, "grad_norm": 2.565121011610061, "learning_rate": 4.930627090301004e-06, "loss": 0.1571, "step": 51550 }, { "epoch": 2.03, "grad_norm": 1.9390353127611322, "learning_rate": 4.930585284280937e-06, "loss": 0.144, "step": 51575 }, { "epoch": 2.03, "grad_norm": 3.0758813481142617, "learning_rate": 4.93054347826087e-06, "loss": 0.1611, "step": 51600 }, { "epoch": 2.03, "grad_norm": 1.59428448676563, "learning_rate": 4.930501672240804e-06, "loss": 0.1634, "step": 51625 }, { "epoch": 2.03, "grad_norm": 2.038301554771562, "learning_rate": 4.930459866220736e-06, "loss": 0.1527, "step": 51650 }, { "epoch": 2.03, "grad_norm": 2.0376499179826846, "learning_rate": 4.930418060200669e-06, "loss": 0.165, "step": 51675 }, { "epoch": 2.03, "grad_norm": 1.6312057183566488, "learning_rate": 4.9303762541806026e-06, "loss": 0.1456, "step": 51700 }, { "epoch": 2.03, "grad_norm": 1.310992199986981, "learning_rate": 4.930334448160535e-06, "loss": 0.1473, "step": 51725 }, { "epoch": 2.04, "grad_norm": 2.3033985635608203, "learning_rate": 4.930292642140469e-06, "loss": 0.155, "step": 51750 }, { "epoch": 2.04, "grad_norm": 1.832532339873309, "learning_rate": 4.9302508361204015e-06, "loss": 0.1594, "step": 51775 }, { "epoch": 2.04, "grad_norm": 1.8899396483286703, "learning_rate": 4.930209030100335e-06, "loss": 0.1534, "step": 51800 }, { "epoch": 2.04, "grad_norm": 1.5216428673701485, "learning_rate": 4.930167224080268e-06, "loss": 0.1608, "step": 51825 }, { "epoch": 2.04, "grad_norm": 3.8041744241802538, "learning_rate": 4.930125418060201e-06, "loss": 0.1779, "step": 51850 }, { "epoch": 2.04, "grad_norm": 2.479253142386971, "learning_rate": 4.930083612040134e-06, "loss": 0.1623, "step": 51875 }, { "epoch": 2.04, "grad_norm": 2.0835171942963253, "learning_rate": 4.9300418060200675e-06, "loss": 0.1685, "step": 51900 }, { "epoch": 2.04, "grad_norm": 2.4988154900909576, "learning_rate": 4.93e-06, "loss": 0.1615, "step": 51925 }, { "epoch": 2.04, "grad_norm": 1.8221176766564573, "learning_rate": 4.929958193979934e-06, "loss": 0.1502, "step": 51950 }, { "epoch": 2.04, "grad_norm": 1.7415285030938414, "learning_rate": 4.9299163879598665e-06, "loss": 0.1559, "step": 51975 }, { "epoch": 2.05, "grad_norm": 2.704013115530803, "learning_rate": 4.9298745819398e-06, "loss": 0.1685, "step": 52000 }, { "epoch": 2.05, "grad_norm": 3.4942317648547885, "learning_rate": 4.929832775919733e-06, "loss": 0.1544, "step": 52025 }, { "epoch": 2.05, "grad_norm": 1.4905741598961249, "learning_rate": 4.929790969899666e-06, "loss": 0.1526, "step": 52050 }, { "epoch": 2.05, "grad_norm": 2.6196280886358214, "learning_rate": 4.929749163879599e-06, "loss": 0.161, "step": 52075 }, { "epoch": 2.05, "grad_norm": 1.8163085825589482, "learning_rate": 4.9297073578595325e-06, "loss": 0.1769, "step": 52100 }, { "epoch": 2.05, "grad_norm": 1.929047133011546, "learning_rate": 4.929665551839465e-06, "loss": 0.1672, "step": 52125 }, { "epoch": 2.05, "grad_norm": 2.483643296525586, "learning_rate": 4.929623745819399e-06, "loss": 0.1441, "step": 52150 }, { "epoch": 2.05, "grad_norm": 2.844006335610527, "learning_rate": 4.929583612040134e-06, "loss": 0.1718, "step": 52175 }, { "epoch": 2.05, "grad_norm": 2.3894318119446356, "learning_rate": 4.929541806020067e-06, "loss": 0.1538, "step": 52200 }, { "epoch": 2.05, "grad_norm": 2.965395513590732, "learning_rate": 4.9295e-06, "loss": 0.1496, "step": 52225 }, { "epoch": 2.06, "grad_norm": 2.304504000730068, "learning_rate": 4.9294581939799335e-06, "loss": 0.1669, "step": 52250 }, { "epoch": 2.06, "grad_norm": 1.0721485878168964, "learning_rate": 4.929416387959867e-06, "loss": 0.1425, "step": 52275 }, { "epoch": 2.06, "grad_norm": 1.8325103490337078, "learning_rate": 4.9293745819398e-06, "loss": 0.1522, "step": 52300 }, { "epoch": 2.06, "grad_norm": 1.9327528432617735, "learning_rate": 4.929332775919733e-06, "loss": 0.162, "step": 52325 }, { "epoch": 2.06, "grad_norm": 1.3305621320480137, "learning_rate": 4.929290969899666e-06, "loss": 0.1493, "step": 52350 }, { "epoch": 2.06, "grad_norm": 1.944031143750634, "learning_rate": 4.9292491638795996e-06, "loss": 0.1526, "step": 52375 }, { "epoch": 2.06, "grad_norm": 2.3706222006628677, "learning_rate": 4.929207357859532e-06, "loss": 0.1537, "step": 52400 }, { "epoch": 2.06, "grad_norm": 2.3458044853455924, "learning_rate": 4.929165551839466e-06, "loss": 0.1633, "step": 52425 }, { "epoch": 2.06, "grad_norm": 1.74893596241187, "learning_rate": 4.9291237458193985e-06, "loss": 0.1573, "step": 52450 }, { "epoch": 2.06, "grad_norm": 2.422402658813327, "learning_rate": 4.929081939799332e-06, "loss": 0.1769, "step": 52475 }, { "epoch": 2.07, "grad_norm": 2.1581876952824732, "learning_rate": 4.929040133779265e-06, "loss": 0.1667, "step": 52500 }, { "epoch": 2.07, "grad_norm": 1.549994934584253, "learning_rate": 4.928998327759198e-06, "loss": 0.1634, "step": 52525 }, { "epoch": 2.07, "grad_norm": 1.9136228137791405, "learning_rate": 4.928956521739131e-06, "loss": 0.1531, "step": 52550 }, { "epoch": 2.07, "grad_norm": 1.710298426938268, "learning_rate": 4.9289147157190645e-06, "loss": 0.1536, "step": 52575 }, { "epoch": 2.07, "grad_norm": 2.5543413568354856, "learning_rate": 4.928872909698996e-06, "loss": 0.1705, "step": 52600 }, { "epoch": 2.07, "grad_norm": 1.7127212550512338, "learning_rate": 4.92883110367893e-06, "loss": 0.1708, "step": 52625 }, { "epoch": 2.07, "grad_norm": 2.3037309062234965, "learning_rate": 4.928789297658863e-06, "loss": 0.1741, "step": 52650 }, { "epoch": 2.07, "grad_norm": 1.9495570743735675, "learning_rate": 4.928747491638796e-06, "loss": 0.1771, "step": 52675 }, { "epoch": 2.07, "grad_norm": 2.238430814642686, "learning_rate": 4.92870568561873e-06, "loss": 0.1583, "step": 52700 }, { "epoch": 2.07, "grad_norm": 2.320952142850099, "learning_rate": 4.9286638795986624e-06, "loss": 0.1593, "step": 52725 }, { "epoch": 2.08, "grad_norm": 2.1755627909046047, "learning_rate": 4.928622073578596e-06, "loss": 0.1469, "step": 52750 }, { "epoch": 2.08, "grad_norm": 2.4189873372395727, "learning_rate": 4.928580267558529e-06, "loss": 0.1456, "step": 52775 }, { "epoch": 2.08, "grad_norm": 2.2511565610144335, "learning_rate": 4.928538461538462e-06, "loss": 0.1436, "step": 52800 }, { "epoch": 2.08, "grad_norm": 2.1374086292631, "learning_rate": 4.928496655518395e-06, "loss": 0.143, "step": 52825 }, { "epoch": 2.08, "grad_norm": 2.461021396003331, "learning_rate": 4.9284548494983285e-06, "loss": 0.1702, "step": 52850 }, { "epoch": 2.08, "grad_norm": 1.9960814769649182, "learning_rate": 4.928413043478261e-06, "loss": 0.1594, "step": 52875 }, { "epoch": 2.08, "grad_norm": 1.568571889804149, "learning_rate": 4.928371237458195e-06, "loss": 0.1537, "step": 52900 }, { "epoch": 2.08, "grad_norm": 2.8880582494240996, "learning_rate": 4.928329431438127e-06, "loss": 0.1487, "step": 52925 }, { "epoch": 2.08, "grad_norm": 1.4868852996183224, "learning_rate": 4.928287625418061e-06, "loss": 0.1567, "step": 52950 }, { "epoch": 2.08, "grad_norm": 1.6979293077536246, "learning_rate": 4.928245819397994e-06, "loss": 0.1661, "step": 52975 }, { "epoch": 2.09, "grad_norm": 2.0399515236372667, "learning_rate": 4.928204013377927e-06, "loss": 0.1573, "step": 53000 }, { "epoch": 2.09, "grad_norm": 2.2471778177012527, "learning_rate": 4.92816220735786e-06, "loss": 0.15, "step": 53025 }, { "epoch": 2.09, "grad_norm": 1.8607165071886655, "learning_rate": 4.9281204013377934e-06, "loss": 0.158, "step": 53050 }, { "epoch": 2.09, "grad_norm": 2.68403495926998, "learning_rate": 4.928078595317726e-06, "loss": 0.1455, "step": 53075 }, { "epoch": 2.09, "grad_norm": 2.1127210702982535, "learning_rate": 4.92803678929766e-06, "loss": 0.1405, "step": 53100 }, { "epoch": 2.09, "grad_norm": 2.843643320066027, "learning_rate": 4.927994983277592e-06, "loss": 0.1716, "step": 53125 }, { "epoch": 2.09, "grad_norm": 2.8317086013921706, "learning_rate": 4.927953177257526e-06, "loss": 0.157, "step": 53150 }, { "epoch": 2.09, "grad_norm": 2.8083401347718393, "learning_rate": 4.927911371237459e-06, "loss": 0.1612, "step": 53175 }, { "epoch": 2.09, "grad_norm": 1.8263439878473045, "learning_rate": 4.927869565217392e-06, "loss": 0.1766, "step": 53200 }, { "epoch": 2.09, "grad_norm": 2.485098578437788, "learning_rate": 4.927827759197325e-06, "loss": 0.1884, "step": 53225 }, { "epoch": 2.09, "grad_norm": 3.5075459930953996, "learning_rate": 4.927785953177258e-06, "loss": 0.1412, "step": 53250 }, { "epoch": 2.1, "grad_norm": 2.395353082662295, "learning_rate": 4.927744147157191e-06, "loss": 0.1559, "step": 53275 }, { "epoch": 2.1, "grad_norm": 2.1892457117001767, "learning_rate": 4.927702341137125e-06, "loss": 0.1704, "step": 53300 }, { "epoch": 2.1, "grad_norm": 2.220899724188775, "learning_rate": 4.927660535117057e-06, "loss": 0.1717, "step": 53325 }, { "epoch": 2.1, "grad_norm": 1.894092777332042, "learning_rate": 4.92761872909699e-06, "loss": 0.1421, "step": 53350 }, { "epoch": 2.1, "grad_norm": 1.6879891749223022, "learning_rate": 4.927576923076924e-06, "loss": 0.1481, "step": 53375 }, { "epoch": 2.1, "grad_norm": 1.849208892249495, "learning_rate": 4.927535117056856e-06, "loss": 0.1536, "step": 53400 }, { "epoch": 2.1, "grad_norm": 3.234496990961037, "learning_rate": 4.92749331103679e-06, "loss": 0.1578, "step": 53425 }, { "epoch": 2.1, "grad_norm": 1.4492358664142355, "learning_rate": 4.9274515050167225e-06, "loss": 0.1511, "step": 53450 }, { "epoch": 2.1, "grad_norm": 2.685397894450621, "learning_rate": 4.927409698996656e-06, "loss": 0.174, "step": 53475 }, { "epoch": 2.1, "grad_norm": 2.3684425125628716, "learning_rate": 4.927367892976589e-06, "loss": 0.1614, "step": 53500 }, { "epoch": 2.11, "grad_norm": 2.11753748476298, "learning_rate": 4.927326086956522e-06, "loss": 0.1559, "step": 53525 }, { "epoch": 2.11, "grad_norm": 1.9354680446303794, "learning_rate": 4.927284280936455e-06, "loss": 0.1663, "step": 53550 }, { "epoch": 2.11, "grad_norm": 2.227156794826095, "learning_rate": 4.9272424749163886e-06, "loss": 0.1799, "step": 53575 }, { "epoch": 2.11, "grad_norm": 2.620633749751813, "learning_rate": 4.927200668896321e-06, "loss": 0.1648, "step": 53600 }, { "epoch": 2.11, "grad_norm": 1.9582576590163816, "learning_rate": 4.927158862876255e-06, "loss": 0.1611, "step": 53625 }, { "epoch": 2.11, "grad_norm": 1.888954284267945, "learning_rate": 4.9271170568561875e-06, "loss": 0.1493, "step": 53650 }, { "epoch": 2.11, "grad_norm": 1.5525653783661335, "learning_rate": 4.927075250836121e-06, "loss": 0.1539, "step": 53675 }, { "epoch": 2.11, "grad_norm": 2.3535390824131723, "learning_rate": 4.927033444816054e-06, "loss": 0.1635, "step": 53700 }, { "epoch": 2.11, "grad_norm": 2.773296192539899, "learning_rate": 4.926991638795987e-06, "loss": 0.1585, "step": 53725 }, { "epoch": 2.11, "grad_norm": 1.784025818674246, "learning_rate": 4.92694983277592e-06, "loss": 0.1533, "step": 53750 }, { "epoch": 2.12, "grad_norm": 1.671456960230062, "learning_rate": 4.9269080267558536e-06, "loss": 0.1546, "step": 53775 }, { "epoch": 2.12, "grad_norm": 2.523737848859212, "learning_rate": 4.926866220735786e-06, "loss": 0.1707, "step": 53800 }, { "epoch": 2.12, "grad_norm": 1.530100074434391, "learning_rate": 4.92682441471572e-06, "loss": 0.1684, "step": 53825 }, { "epoch": 2.12, "grad_norm": 1.3561056846922916, "learning_rate": 4.9267826086956525e-06, "loss": 0.1888, "step": 53850 }, { "epoch": 2.12, "grad_norm": 2.0777323154601883, "learning_rate": 4.926740802675586e-06, "loss": 0.1646, "step": 53875 }, { "epoch": 2.12, "grad_norm": 1.493446217332173, "learning_rate": 4.926698996655519e-06, "loss": 0.1517, "step": 53900 }, { "epoch": 2.12, "grad_norm": 2.1136296419506113, "learning_rate": 4.926657190635452e-06, "loss": 0.1633, "step": 53925 }, { "epoch": 2.12, "grad_norm": 2.3989158546724845, "learning_rate": 4.926615384615385e-06, "loss": 0.1561, "step": 53950 }, { "epoch": 2.12, "grad_norm": 3.0172378893049374, "learning_rate": 4.9265735785953185e-06, "loss": 0.1728, "step": 53975 }, { "epoch": 2.12, "grad_norm": 1.9134652124787792, "learning_rate": 4.926531772575251e-06, "loss": 0.1601, "step": 54000 }, { "epoch": 2.13, "grad_norm": 2.279368931878358, "learning_rate": 4.926489966555184e-06, "loss": 0.1619, "step": 54025 }, { "epoch": 2.13, "grad_norm": 1.4996559021361437, "learning_rate": 4.9264481605351175e-06, "loss": 0.1535, "step": 54050 }, { "epoch": 2.13, "grad_norm": 1.7199074418315037, "learning_rate": 4.92640635451505e-06, "loss": 0.1526, "step": 54075 }, { "epoch": 2.13, "grad_norm": 2.1648899905797956, "learning_rate": 4.926364548494984e-06, "loss": 0.1653, "step": 54100 }, { "epoch": 2.13, "grad_norm": 1.7286400940104754, "learning_rate": 4.926322742474916e-06, "loss": 0.1557, "step": 54125 }, { "epoch": 2.13, "grad_norm": 1.5889299691023648, "learning_rate": 4.92628093645485e-06, "loss": 0.1623, "step": 54150 }, { "epoch": 2.13, "grad_norm": 2.194237907627962, "learning_rate": 4.926240802675586e-06, "loss": 0.1694, "step": 54175 }, { "epoch": 2.13, "grad_norm": 1.8557622264070135, "learning_rate": 4.926198996655519e-06, "loss": 0.1708, "step": 54200 }, { "epoch": 2.13, "grad_norm": 1.6786879794483356, "learning_rate": 4.926157190635452e-06, "loss": 0.151, "step": 54225 }, { "epoch": 2.13, "grad_norm": 2.9939730018473427, "learning_rate": 4.926115384615385e-06, "loss": 0.1538, "step": 54250 }, { "epoch": 2.14, "grad_norm": 1.9875443858741775, "learning_rate": 4.926073578595318e-06, "loss": 0.176, "step": 54275 }, { "epoch": 2.14, "grad_norm": 1.7973867233787022, "learning_rate": 4.926031772575251e-06, "loss": 0.145, "step": 54300 }, { "epoch": 2.14, "grad_norm": 2.216337336553059, "learning_rate": 4.9259899665551845e-06, "loss": 0.1767, "step": 54325 }, { "epoch": 2.14, "grad_norm": 3.4129098940982208, "learning_rate": 4.925948160535117e-06, "loss": 0.1485, "step": 54350 }, { "epoch": 2.14, "grad_norm": 2.0007981016491767, "learning_rate": 4.925906354515051e-06, "loss": 0.1441, "step": 54375 }, { "epoch": 2.14, "grad_norm": 2.8583478535737, "learning_rate": 4.9258645484949835e-06, "loss": 0.1594, "step": 54400 }, { "epoch": 2.14, "grad_norm": 2.314179244280809, "learning_rate": 4.925822742474917e-06, "loss": 0.155, "step": 54425 }, { "epoch": 2.14, "grad_norm": 2.0132476893206763, "learning_rate": 4.92578093645485e-06, "loss": 0.1656, "step": 54450 }, { "epoch": 2.14, "grad_norm": 1.8635957755225794, "learning_rate": 4.925739130434783e-06, "loss": 0.1602, "step": 54475 }, { "epoch": 2.14, "grad_norm": 2.834088448909386, "learning_rate": 4.925697324414716e-06, "loss": 0.1691, "step": 54500 }, { "epoch": 2.15, "grad_norm": 2.2049287035923615, "learning_rate": 4.9256555183946495e-06, "loss": 0.1581, "step": 54525 }, { "epoch": 2.15, "grad_norm": 3.2612117086630983, "learning_rate": 4.925613712374582e-06, "loss": 0.1581, "step": 54550 }, { "epoch": 2.15, "grad_norm": 1.7241251316995652, "learning_rate": 4.925571906354516e-06, "loss": 0.1623, "step": 54575 }, { "epoch": 2.15, "grad_norm": 2.2161375845886737, "learning_rate": 4.9255301003344484e-06, "loss": 0.173, "step": 54600 }, { "epoch": 2.15, "grad_norm": 1.6279708477585182, "learning_rate": 4.925488294314382e-06, "loss": 0.1622, "step": 54625 }, { "epoch": 2.15, "grad_norm": 1.6368572644697492, "learning_rate": 4.925446488294315e-06, "loss": 0.1665, "step": 54650 }, { "epoch": 2.15, "grad_norm": 1.2374445674653936, "learning_rate": 4.925404682274248e-06, "loss": 0.1645, "step": 54675 }, { "epoch": 2.15, "grad_norm": 2.777041275353947, "learning_rate": 4.925362876254181e-06, "loss": 0.1631, "step": 54700 }, { "epoch": 2.15, "grad_norm": 2.4610825173728323, "learning_rate": 4.9253210702341145e-06, "loss": 0.1594, "step": 54725 }, { "epoch": 2.15, "grad_norm": 2.0784666529738263, "learning_rate": 4.925279264214047e-06, "loss": 0.1563, "step": 54750 }, { "epoch": 2.15, "grad_norm": 1.620773172947401, "learning_rate": 4.925237458193981e-06, "loss": 0.1534, "step": 54775 }, { "epoch": 2.16, "grad_norm": 1.5041303743520913, "learning_rate": 4.925195652173913e-06, "loss": 0.1823, "step": 54800 }, { "epoch": 2.16, "grad_norm": 2.3136541774000023, "learning_rate": 4.925153846153847e-06, "loss": 0.1643, "step": 54825 }, { "epoch": 2.16, "grad_norm": 2.2238847850927437, "learning_rate": 4.92511204013378e-06, "loss": 0.1761, "step": 54850 }, { "epoch": 2.16, "grad_norm": 2.713279366369079, "learning_rate": 4.925070234113713e-06, "loss": 0.1657, "step": 54875 }, { "epoch": 2.16, "grad_norm": 2.6333303154824503, "learning_rate": 4.925028428093646e-06, "loss": 0.1627, "step": 54900 }, { "epoch": 2.16, "grad_norm": 2.863859374260147, "learning_rate": 4.9249866220735795e-06, "loss": 0.1524, "step": 54925 }, { "epoch": 2.16, "grad_norm": 1.7876622608964243, "learning_rate": 4.924944816053512e-06, "loss": 0.1603, "step": 54950 }, { "epoch": 2.16, "grad_norm": 1.7726951565012345, "learning_rate": 4.924903010033445e-06, "loss": 0.1471, "step": 54975 }, { "epoch": 2.16, "grad_norm": 1.823226550047555, "learning_rate": 4.924861204013378e-06, "loss": 0.1562, "step": 55000 }, { "epoch": 2.16, "grad_norm": 1.6079458847263772, "learning_rate": 4.924819397993311e-06, "loss": 0.1687, "step": 55025 }, { "epoch": 2.17, "grad_norm": 2.505583643161287, "learning_rate": 4.924777591973245e-06, "loss": 0.1666, "step": 55050 }, { "epoch": 2.17, "grad_norm": 2.2835391606286155, "learning_rate": 4.924735785953177e-06, "loss": 0.1619, "step": 55075 }, { "epoch": 2.17, "grad_norm": 2.0288694878473654, "learning_rate": 4.924693979933111e-06, "loss": 0.1612, "step": 55100 }, { "epoch": 2.17, "grad_norm": 1.8297162400569662, "learning_rate": 4.924652173913044e-06, "loss": 0.1679, "step": 55125 }, { "epoch": 2.17, "grad_norm": 1.5215046066698381, "learning_rate": 4.924610367892977e-06, "loss": 0.1636, "step": 55150 }, { "epoch": 2.17, "grad_norm": 2.0415805165337577, "learning_rate": 4.924570234113713e-06, "loss": 0.1617, "step": 55175 }, { "epoch": 2.17, "grad_norm": 1.885372598413116, "learning_rate": 4.924528428093646e-06, "loss": 0.1503, "step": 55200 }, { "epoch": 2.17, "grad_norm": 1.693220968582674, "learning_rate": 4.924486622073578e-06, "loss": 0.174, "step": 55225 }, { "epoch": 2.17, "grad_norm": 1.6448831869675682, "learning_rate": 4.924444816053512e-06, "loss": 0.1568, "step": 55250 }, { "epoch": 2.17, "grad_norm": 1.6427824071243027, "learning_rate": 4.924403010033445e-06, "loss": 0.1409, "step": 55275 }, { "epoch": 2.18, "grad_norm": 3.5051468060927804, "learning_rate": 4.924361204013378e-06, "loss": 0.16, "step": 55300 }, { "epoch": 2.18, "grad_norm": 1.3554161547534496, "learning_rate": 4.924319397993311e-06, "loss": 0.1635, "step": 55325 }, { "epoch": 2.18, "grad_norm": 2.2438531362592653, "learning_rate": 4.924277591973244e-06, "loss": 0.1696, "step": 55350 }, { "epoch": 2.18, "grad_norm": 1.993729955191998, "learning_rate": 4.924235785953178e-06, "loss": 0.1414, "step": 55375 }, { "epoch": 2.18, "grad_norm": 2.812931969711888, "learning_rate": 4.924193979933111e-06, "loss": 0.1535, "step": 55400 }, { "epoch": 2.18, "grad_norm": 2.2778307356395966, "learning_rate": 4.924152173913044e-06, "loss": 0.1682, "step": 55425 }, { "epoch": 2.18, "grad_norm": 2.1191913216519205, "learning_rate": 4.924110367892977e-06, "loss": 0.1556, "step": 55450 }, { "epoch": 2.18, "grad_norm": 2.300298131748246, "learning_rate": 4.9240685618729104e-06, "loss": 0.1451, "step": 55475 }, { "epoch": 2.18, "grad_norm": 1.9976266390195618, "learning_rate": 4.924026755852843e-06, "loss": 0.1544, "step": 55500 }, { "epoch": 2.18, "grad_norm": 1.9317993781076177, "learning_rate": 4.923984949832777e-06, "loss": 0.1729, "step": 55525 }, { "epoch": 2.19, "grad_norm": 2.718753249528282, "learning_rate": 4.923943143812709e-06, "loss": 0.1441, "step": 55550 }, { "epoch": 2.19, "grad_norm": 2.2565840107062485, "learning_rate": 4.923901337792643e-06, "loss": 0.1674, "step": 55575 }, { "epoch": 2.19, "grad_norm": 2.383013144810345, "learning_rate": 4.923859531772576e-06, "loss": 0.1605, "step": 55600 }, { "epoch": 2.19, "grad_norm": 3.3092883642771405, "learning_rate": 4.923817725752509e-06, "loss": 0.1669, "step": 55625 }, { "epoch": 2.19, "grad_norm": 2.211414188184337, "learning_rate": 4.923775919732442e-06, "loss": 0.1389, "step": 55650 }, { "epoch": 2.19, "grad_norm": 2.488338988031535, "learning_rate": 4.923734113712375e-06, "loss": 0.168, "step": 55675 }, { "epoch": 2.19, "grad_norm": 2.300045920710584, "learning_rate": 4.923692307692308e-06, "loss": 0.1383, "step": 55700 }, { "epoch": 2.19, "grad_norm": 2.6881951652660336, "learning_rate": 4.923650501672242e-06, "loss": 0.1754, "step": 55725 }, { "epoch": 2.19, "grad_norm": 2.930605332694203, "learning_rate": 4.923608695652174e-06, "loss": 0.1585, "step": 55750 }, { "epoch": 2.19, "grad_norm": 1.7962333691225325, "learning_rate": 4.923566889632108e-06, "loss": 0.1539, "step": 55775 }, { "epoch": 2.2, "grad_norm": 1.6517198727704034, "learning_rate": 4.923525083612041e-06, "loss": 0.1631, "step": 55800 }, { "epoch": 2.2, "grad_norm": 3.2519972863357274, "learning_rate": 4.923483277591974e-06, "loss": 0.1734, "step": 55825 }, { "epoch": 2.2, "grad_norm": 3.322960523681727, "learning_rate": 4.923441471571907e-06, "loss": 0.1626, "step": 55850 }, { "epoch": 2.2, "grad_norm": 2.3228889974625515, "learning_rate": 4.92339966555184e-06, "loss": 0.1629, "step": 55875 }, { "epoch": 2.2, "grad_norm": 2.0280038892158694, "learning_rate": 4.923357859531773e-06, "loss": 0.1828, "step": 55900 }, { "epoch": 2.2, "grad_norm": 1.8535550307672954, "learning_rate": 4.923316053511706e-06, "loss": 0.1631, "step": 55925 }, { "epoch": 2.2, "grad_norm": 3.309157550953824, "learning_rate": 4.923274247491639e-06, "loss": 0.1722, "step": 55950 }, { "epoch": 2.2, "grad_norm": 2.351125286313886, "learning_rate": 4.923232441471572e-06, "loss": 0.1695, "step": 55975 }, { "epoch": 2.2, "grad_norm": 2.297372457039874, "learning_rate": 4.9231906354515056e-06, "loss": 0.1553, "step": 56000 }, { "epoch": 2.2, "grad_norm": 2.0925449099759983, "learning_rate": 4.923148829431438e-06, "loss": 0.1732, "step": 56025 }, { "epoch": 2.21, "grad_norm": 1.6960095376972097, "learning_rate": 4.923107023411372e-06, "loss": 0.1474, "step": 56050 }, { "epoch": 2.21, "grad_norm": 1.9693615031084075, "learning_rate": 4.9230652173913045e-06, "loss": 0.1656, "step": 56075 }, { "epoch": 2.21, "grad_norm": 2.754391153522097, "learning_rate": 4.923023411371238e-06, "loss": 0.1621, "step": 56100 }, { "epoch": 2.21, "grad_norm": 2.6063940292761676, "learning_rate": 4.922981605351171e-06, "loss": 0.142, "step": 56125 }, { "epoch": 2.21, "grad_norm": 2.000603817581123, "learning_rate": 4.922939799331104e-06, "loss": 0.1588, "step": 56150 }, { "epoch": 2.21, "grad_norm": 1.8577242463400019, "learning_rate": 4.922899665551839e-06, "loss": 0.1448, "step": 56175 }, { "epoch": 2.21, "grad_norm": 2.3453900092605062, "learning_rate": 4.922857859531773e-06, "loss": 0.1489, "step": 56200 }, { "epoch": 2.21, "grad_norm": 1.7724438857218991, "learning_rate": 4.9228160535117055e-06, "loss": 0.1619, "step": 56225 }, { "epoch": 2.21, "grad_norm": 1.3524805703480405, "learning_rate": 4.922774247491639e-06, "loss": 0.1687, "step": 56250 }, { "epoch": 2.21, "grad_norm": 3.3408597681628738, "learning_rate": 4.922732441471572e-06, "loss": 0.1643, "step": 56275 }, { "epoch": 2.21, "grad_norm": 1.7170094914128877, "learning_rate": 4.922690635451505e-06, "loss": 0.1667, "step": 56300 }, { "epoch": 2.22, "grad_norm": 1.9939337796903036, "learning_rate": 4.922648829431438e-06, "loss": 0.1436, "step": 56325 }, { "epoch": 2.22, "grad_norm": 3.413924259574225, "learning_rate": 4.9226070234113716e-06, "loss": 0.1715, "step": 56350 }, { "epoch": 2.22, "grad_norm": 2.322985702949605, "learning_rate": 4.922565217391304e-06, "loss": 0.1658, "step": 56375 }, { "epoch": 2.22, "grad_norm": 2.370524435039194, "learning_rate": 4.922523411371238e-06, "loss": 0.1577, "step": 56400 }, { "epoch": 2.22, "grad_norm": 1.983387072213947, "learning_rate": 4.9224816053511705e-06, "loss": 0.1768, "step": 56425 }, { "epoch": 2.22, "grad_norm": 3.188374061799889, "learning_rate": 4.922439799331104e-06, "loss": 0.1555, "step": 56450 }, { "epoch": 2.22, "grad_norm": 1.5451448494834452, "learning_rate": 4.922397993311038e-06, "loss": 0.1627, "step": 56475 }, { "epoch": 2.22, "grad_norm": 1.444626924036314, "learning_rate": 4.92235618729097e-06, "loss": 0.1518, "step": 56500 }, { "epoch": 2.22, "grad_norm": 2.0333573434866046, "learning_rate": 4.922314381270904e-06, "loss": 0.1682, "step": 56525 }, { "epoch": 2.22, "grad_norm": 2.404693100907314, "learning_rate": 4.9222725752508365e-06, "loss": 0.1605, "step": 56550 }, { "epoch": 2.23, "grad_norm": 1.8865735811248825, "learning_rate": 4.92223076923077e-06, "loss": 0.17, "step": 56575 }, { "epoch": 2.23, "grad_norm": 1.9171365472921278, "learning_rate": 4.922188963210703e-06, "loss": 0.1693, "step": 56600 }, { "epoch": 2.23, "grad_norm": 2.4763194923869865, "learning_rate": 4.922147157190636e-06, "loss": 0.1562, "step": 56625 }, { "epoch": 2.23, "grad_norm": 2.764068022978239, "learning_rate": 4.922105351170569e-06, "loss": 0.1527, "step": 56650 }, { "epoch": 2.23, "grad_norm": 2.1369551320651, "learning_rate": 4.9220635451505026e-06, "loss": 0.1488, "step": 56675 }, { "epoch": 2.23, "grad_norm": 2.325422901668469, "learning_rate": 4.922021739130435e-06, "loss": 0.1719, "step": 56700 }, { "epoch": 2.23, "grad_norm": 2.1041649166600105, "learning_rate": 4.921979933110369e-06, "loss": 0.1768, "step": 56725 }, { "epoch": 2.23, "grad_norm": 1.7655589341104052, "learning_rate": 4.9219381270903015e-06, "loss": 0.1777, "step": 56750 }, { "epoch": 2.23, "grad_norm": 1.8380477181947645, "learning_rate": 4.921896321070235e-06, "loss": 0.1644, "step": 56775 }, { "epoch": 2.23, "grad_norm": 2.3126435559314795, "learning_rate": 4.921854515050168e-06, "loss": 0.1731, "step": 56800 }, { "epoch": 2.24, "grad_norm": 2.56461416141031, "learning_rate": 4.921812709030101e-06, "loss": 0.159, "step": 56825 }, { "epoch": 2.24, "grad_norm": 2.1602569083850414, "learning_rate": 4.921770903010033e-06, "loss": 0.1584, "step": 56850 }, { "epoch": 2.24, "grad_norm": 1.8962818131934016, "learning_rate": 4.921729096989967e-06, "loss": 0.1576, "step": 56875 }, { "epoch": 2.24, "grad_norm": 1.8524281706098542, "learning_rate": 4.9216872909699e-06, "loss": 0.1561, "step": 56900 }, { "epoch": 2.24, "grad_norm": 1.9143396372190553, "learning_rate": 4.921645484949833e-06, "loss": 0.1692, "step": 56925 }, { "epoch": 2.24, "grad_norm": 2.822879321951725, "learning_rate": 4.9216036789297665e-06, "loss": 0.1613, "step": 56950 }, { "epoch": 2.24, "grad_norm": 1.894258326831287, "learning_rate": 4.921561872909699e-06, "loss": 0.1541, "step": 56975 }, { "epoch": 2.24, "grad_norm": 1.7074729590893851, "learning_rate": 4.921520066889633e-06, "loss": 0.1555, "step": 57000 }, { "epoch": 2.24, "grad_norm": 1.612601267245271, "learning_rate": 4.9214782608695654e-06, "loss": 0.145, "step": 57025 }, { "epoch": 2.24, "grad_norm": 2.3502778459294635, "learning_rate": 4.921436454849499e-06, "loss": 0.1602, "step": 57050 }, { "epoch": 2.25, "grad_norm": 1.6450546458329638, "learning_rate": 4.921394648829432e-06, "loss": 0.1717, "step": 57075 }, { "epoch": 2.25, "grad_norm": 1.6578146368646642, "learning_rate": 4.921352842809365e-06, "loss": 0.156, "step": 57100 }, { "epoch": 2.25, "grad_norm": 2.2414245528366807, "learning_rate": 4.921311036789298e-06, "loss": 0.1353, "step": 57125 }, { "epoch": 2.25, "grad_norm": 2.8998348774283498, "learning_rate": 4.9212692307692315e-06, "loss": 0.171, "step": 57150 }, { "epoch": 2.25, "grad_norm": 2.9985448953876657, "learning_rate": 4.9212290969899665e-06, "loss": 0.1467, "step": 57175 }, { "epoch": 2.25, "grad_norm": 2.399374920196816, "learning_rate": 4.9211872909699e-06, "loss": 0.1614, "step": 57200 }, { "epoch": 2.25, "grad_norm": 2.430911112893449, "learning_rate": 4.921145484949833e-06, "loss": 0.1678, "step": 57225 }, { "epoch": 2.25, "grad_norm": 2.119242897772184, "learning_rate": 4.921103678929766e-06, "loss": 0.1608, "step": 57250 }, { "epoch": 2.25, "grad_norm": 2.4857313645191823, "learning_rate": 4.921061872909699e-06, "loss": 0.1827, "step": 57275 }, { "epoch": 2.25, "grad_norm": 2.904099021212478, "learning_rate": 4.9210200668896325e-06, "loss": 0.182, "step": 57300 }, { "epoch": 2.26, "grad_norm": 1.548156179946889, "learning_rate": 4.920978260869565e-06, "loss": 0.1628, "step": 57325 }, { "epoch": 2.26, "grad_norm": 2.3494390107702494, "learning_rate": 4.920936454849499e-06, "loss": 0.1479, "step": 57350 }, { "epoch": 2.26, "grad_norm": 1.8864858832812406, "learning_rate": 4.9208946488294314e-06, "loss": 0.159, "step": 57375 }, { "epoch": 2.26, "grad_norm": 1.3675349817558708, "learning_rate": 4.920852842809365e-06, "loss": 0.1698, "step": 57400 }, { "epoch": 2.26, "grad_norm": 1.5225907794607534, "learning_rate": 4.920811036789298e-06, "loss": 0.1651, "step": 57425 }, { "epoch": 2.26, "grad_norm": 2.1154683368095637, "learning_rate": 4.920769230769231e-06, "loss": 0.1577, "step": 57450 }, { "epoch": 2.26, "grad_norm": 1.8981664952230868, "learning_rate": 4.920727424749164e-06, "loss": 0.1719, "step": 57475 }, { "epoch": 2.26, "grad_norm": 2.53863923398963, "learning_rate": 4.9206856187290975e-06, "loss": 0.1644, "step": 57500 }, { "epoch": 2.26, "grad_norm": 2.105776922935975, "learning_rate": 4.92064381270903e-06, "loss": 0.1545, "step": 57525 }, { "epoch": 2.26, "grad_norm": 2.0750252396438587, "learning_rate": 4.920602006688964e-06, "loss": 0.155, "step": 57550 }, { "epoch": 2.27, "grad_norm": 3.1660226968910576, "learning_rate": 4.920560200668897e-06, "loss": 0.1731, "step": 57575 }, { "epoch": 2.27, "grad_norm": 3.025064147146057, "learning_rate": 4.92051839464883e-06, "loss": 0.1658, "step": 57600 }, { "epoch": 2.27, "grad_norm": 2.4539822980767205, "learning_rate": 4.9204765886287635e-06, "loss": 0.1496, "step": 57625 }, { "epoch": 2.27, "grad_norm": 1.8245524096248626, "learning_rate": 4.920434782608696e-06, "loss": 0.1479, "step": 57650 }, { "epoch": 2.27, "grad_norm": 2.1986903832701565, "learning_rate": 4.92039297658863e-06, "loss": 0.1757, "step": 57675 }, { "epoch": 2.27, "grad_norm": 1.737008195248559, "learning_rate": 4.9203511705685624e-06, "loss": 0.1574, "step": 57700 }, { "epoch": 2.27, "grad_norm": 2.7053823545914026, "learning_rate": 4.920309364548496e-06, "loss": 0.169, "step": 57725 }, { "epoch": 2.27, "grad_norm": 2.4113450770376206, "learning_rate": 4.920267558528429e-06, "loss": 0.1667, "step": 57750 }, { "epoch": 2.27, "grad_norm": 1.970065828181729, "learning_rate": 4.920225752508362e-06, "loss": 0.1661, "step": 57775 }, { "epoch": 2.27, "grad_norm": 3.0890846295834673, "learning_rate": 4.920183946488294e-06, "loss": 0.1638, "step": 57800 }, { "epoch": 2.27, "grad_norm": 2.076681262223452, "learning_rate": 4.920142140468228e-06, "loss": 0.1864, "step": 57825 }, { "epoch": 2.28, "grad_norm": 2.343128244131371, "learning_rate": 4.92010033444816e-06, "loss": 0.1687, "step": 57850 }, { "epoch": 2.28, "grad_norm": 2.9276428639103855, "learning_rate": 4.920058528428094e-06, "loss": 0.1614, "step": 57875 }, { "epoch": 2.28, "grad_norm": 1.761415206373535, "learning_rate": 4.9200167224080266e-06, "loss": 0.1636, "step": 57900 }, { "epoch": 2.28, "grad_norm": 1.2563054496648425, "learning_rate": 4.91997491638796e-06, "loss": 0.1586, "step": 57925 }, { "epoch": 2.28, "grad_norm": 2.639280854740686, "learning_rate": 4.919933110367893e-06, "loss": 0.1663, "step": 57950 }, { "epoch": 2.28, "grad_norm": 1.4109976776005013, "learning_rate": 4.919891304347826e-06, "loss": 0.1607, "step": 57975 }, { "epoch": 2.28, "grad_norm": 2.6597424343430185, "learning_rate": 4.91984949832776e-06, "loss": 0.142, "step": 58000 }, { "epoch": 2.28, "grad_norm": 2.734114761003077, "learning_rate": 4.919807692307693e-06, "loss": 0.1666, "step": 58025 }, { "epoch": 2.28, "grad_norm": 1.396277658035052, "learning_rate": 4.919765886287626e-06, "loss": 0.1505, "step": 58050 }, { "epoch": 2.28, "grad_norm": 2.5980057974065858, "learning_rate": 4.919724080267559e-06, "loss": 0.1425, "step": 58075 }, { "epoch": 2.29, "grad_norm": 1.260543291214527, "learning_rate": 4.919682274247492e-06, "loss": 0.1507, "step": 58100 }, { "epoch": 2.29, "grad_norm": 1.759475488284363, "learning_rate": 4.919640468227425e-06, "loss": 0.1804, "step": 58125 }, { "epoch": 2.29, "grad_norm": 2.8038300402487817, "learning_rate": 4.919598662207359e-06, "loss": 0.1644, "step": 58150 }, { "epoch": 2.29, "grad_norm": 2.260869107549195, "learning_rate": 4.919558528428094e-06, "loss": 0.1508, "step": 58175 }, { "epoch": 2.29, "grad_norm": 2.414584150357879, "learning_rate": 4.919516722408027e-06, "loss": 0.1709, "step": 58200 }, { "epoch": 2.29, "grad_norm": 1.5049432654576578, "learning_rate": 4.91947491638796e-06, "loss": 0.1667, "step": 58225 }, { "epoch": 2.29, "grad_norm": 2.2825260278718655, "learning_rate": 4.919433110367893e-06, "loss": 0.1566, "step": 58250 }, { "epoch": 2.29, "grad_norm": 1.6614178100576562, "learning_rate": 4.919391304347826e-06, "loss": 0.1568, "step": 58275 }, { "epoch": 2.29, "grad_norm": 1.7269557876148456, "learning_rate": 4.91934949832776e-06, "loss": 0.1552, "step": 58300 }, { "epoch": 2.29, "grad_norm": 2.1426447315621164, "learning_rate": 4.919307692307692e-06, "loss": 0.1544, "step": 58325 }, { "epoch": 2.3, "grad_norm": 2.847512216664601, "learning_rate": 4.919265886287626e-06, "loss": 0.1763, "step": 58350 }, { "epoch": 2.3, "grad_norm": 2.177179572888644, "learning_rate": 4.919224080267559e-06, "loss": 0.1649, "step": 58375 }, { "epoch": 2.3, "grad_norm": 2.3301719269253813, "learning_rate": 4.919182274247492e-06, "loss": 0.1629, "step": 58400 }, { "epoch": 2.3, "grad_norm": 2.9371628068840936, "learning_rate": 4.919140468227425e-06, "loss": 0.1582, "step": 58425 }, { "epoch": 2.3, "grad_norm": 2.1562146801351836, "learning_rate": 4.919098662207358e-06, "loss": 0.1905, "step": 58450 }, { "epoch": 2.3, "grad_norm": 2.191229103225186, "learning_rate": 4.919056856187291e-06, "loss": 0.1694, "step": 58475 }, { "epoch": 2.3, "grad_norm": 1.6064059646004025, "learning_rate": 4.919015050167225e-06, "loss": 0.1488, "step": 58500 }, { "epoch": 2.3, "grad_norm": 4.323530192845432, "learning_rate": 4.918973244147157e-06, "loss": 0.1528, "step": 58525 }, { "epoch": 2.3, "grad_norm": 2.095986879359018, "learning_rate": 4.918931438127091e-06, "loss": 0.1711, "step": 58550 }, { "epoch": 2.3, "grad_norm": 2.10375244642294, "learning_rate": 4.918889632107024e-06, "loss": 0.1797, "step": 58575 }, { "epoch": 2.31, "grad_norm": 2.4047401055803066, "learning_rate": 4.918847826086957e-06, "loss": 0.1688, "step": 58600 }, { "epoch": 2.31, "grad_norm": 2.310141666785082, "learning_rate": 4.91880602006689e-06, "loss": 0.1661, "step": 58625 }, { "epoch": 2.31, "grad_norm": 1.86452149655283, "learning_rate": 4.918764214046823e-06, "loss": 0.1591, "step": 58650 }, { "epoch": 2.31, "grad_norm": 2.187797758585452, "learning_rate": 4.918722408026757e-06, "loss": 0.1584, "step": 58675 }, { "epoch": 2.31, "grad_norm": 1.8069341965191519, "learning_rate": 4.91868060200669e-06, "loss": 0.1555, "step": 58700 }, { "epoch": 2.31, "grad_norm": 2.18250105172939, "learning_rate": 4.918638795986623e-06, "loss": 0.1623, "step": 58725 }, { "epoch": 2.31, "grad_norm": 2.6732097415682214, "learning_rate": 4.918596989966555e-06, "loss": 0.1664, "step": 58750 }, { "epoch": 2.31, "grad_norm": 1.2977159672754395, "learning_rate": 4.9185551839464886e-06, "loss": 0.166, "step": 58775 }, { "epoch": 2.31, "grad_norm": 2.007392545610804, "learning_rate": 4.918513377926421e-06, "loss": 0.1634, "step": 58800 }, { "epoch": 2.31, "grad_norm": 2.7077236084984597, "learning_rate": 4.918471571906355e-06, "loss": 0.1491, "step": 58825 }, { "epoch": 2.32, "grad_norm": 2.424611894537868, "learning_rate": 4.9184297658862875e-06, "loss": 0.1587, "step": 58850 }, { "epoch": 2.32, "grad_norm": 1.3860933115368321, "learning_rate": 4.918387959866221e-06, "loss": 0.1496, "step": 58875 }, { "epoch": 2.32, "grad_norm": 2.6511674403642846, "learning_rate": 4.918346153846154e-06, "loss": 0.1678, "step": 58900 }, { "epoch": 2.32, "grad_norm": 2.1685696271223653, "learning_rate": 4.918304347826087e-06, "loss": 0.1411, "step": 58925 }, { "epoch": 2.32, "grad_norm": 2.0485582790269428, "learning_rate": 4.91826254180602e-06, "loss": 0.1757, "step": 58950 }, { "epoch": 2.32, "grad_norm": 2.6032184254228543, "learning_rate": 4.9182207357859535e-06, "loss": 0.1625, "step": 58975 }, { "epoch": 2.32, "grad_norm": 2.027124650130354, "learning_rate": 4.918178929765886e-06, "loss": 0.1421, "step": 59000 }, { "epoch": 2.32, "grad_norm": 2.8814506846542645, "learning_rate": 4.91813712374582e-06, "loss": 0.1628, "step": 59025 }, { "epoch": 2.32, "grad_norm": 1.1579894725073154, "learning_rate": 4.9180953177257525e-06, "loss": 0.1654, "step": 59050 }, { "epoch": 2.32, "grad_norm": 2.111540003358208, "learning_rate": 4.918053511705686e-06, "loss": 0.1695, "step": 59075 }, { "epoch": 2.33, "grad_norm": 2.7137729677726354, "learning_rate": 4.9180117056856196e-06, "loss": 0.1486, "step": 59100 }, { "epoch": 2.33, "grad_norm": 1.7812965649458703, "learning_rate": 4.917969899665552e-06, "loss": 0.1638, "step": 59125 }, { "epoch": 2.33, "grad_norm": 2.011544514420453, "learning_rate": 4.917928093645486e-06, "loss": 0.1733, "step": 59150 }, { "epoch": 2.33, "grad_norm": 2.390700805022534, "learning_rate": 4.917887959866221e-06, "loss": 0.1751, "step": 59175 }, { "epoch": 2.33, "grad_norm": 2.969735384499303, "learning_rate": 4.917846153846154e-06, "loss": 0.1729, "step": 59200 }, { "epoch": 2.33, "grad_norm": 1.913785187777396, "learning_rate": 4.917804347826087e-06, "loss": 0.153, "step": 59225 }, { "epoch": 2.33, "grad_norm": 1.8021064858925475, "learning_rate": 4.917762541806021e-06, "loss": 0.1502, "step": 59250 }, { "epoch": 2.33, "grad_norm": 2.833978772258698, "learning_rate": 4.917720735785953e-06, "loss": 0.1724, "step": 59275 }, { "epoch": 2.33, "grad_norm": 2.2849194247990177, "learning_rate": 4.917678929765887e-06, "loss": 0.1698, "step": 59300 }, { "epoch": 2.33, "grad_norm": 1.6327536122141721, "learning_rate": 4.9176371237458195e-06, "loss": 0.1479, "step": 59325 }, { "epoch": 2.33, "grad_norm": 1.8262023075338034, "learning_rate": 4.917595317725753e-06, "loss": 0.1642, "step": 59350 }, { "epoch": 2.34, "grad_norm": 1.9784854693898795, "learning_rate": 4.917553511705686e-06, "loss": 0.158, "step": 59375 }, { "epoch": 2.34, "grad_norm": 2.359260864971092, "learning_rate": 4.917511705685619e-06, "loss": 0.1571, "step": 59400 }, { "epoch": 2.34, "grad_norm": 2.2374038047494427, "learning_rate": 4.917469899665552e-06, "loss": 0.1471, "step": 59425 }, { "epoch": 2.34, "grad_norm": 2.0946720369210645, "learning_rate": 4.9174280936454856e-06, "loss": 0.1658, "step": 59450 }, { "epoch": 2.34, "grad_norm": 2.395192692613216, "learning_rate": 4.917386287625418e-06, "loss": 0.1662, "step": 59475 }, { "epoch": 2.34, "grad_norm": 2.107283464209178, "learning_rate": 4.917344481605352e-06, "loss": 0.1646, "step": 59500 }, { "epoch": 2.34, "grad_norm": 3.4601379596453037, "learning_rate": 4.9173026755852845e-06, "loss": 0.1577, "step": 59525 }, { "epoch": 2.34, "grad_norm": 2.7885302397106813, "learning_rate": 4.917260869565218e-06, "loss": 0.1491, "step": 59550 }, { "epoch": 2.34, "grad_norm": 1.9441052075639202, "learning_rate": 4.917219063545151e-06, "loss": 0.1518, "step": 59575 }, { "epoch": 2.34, "grad_norm": 2.435857624262805, "learning_rate": 4.917177257525084e-06, "loss": 0.1631, "step": 59600 }, { "epoch": 2.35, "grad_norm": 2.4963602725155, "learning_rate": 4.917135451505017e-06, "loss": 0.1588, "step": 59625 }, { "epoch": 2.35, "grad_norm": 2.324784100257762, "learning_rate": 4.9170936454849505e-06, "loss": 0.1539, "step": 59650 }, { "epoch": 2.35, "grad_norm": 2.0187377882547612, "learning_rate": 4.917051839464883e-06, "loss": 0.1695, "step": 59675 }, { "epoch": 2.35, "grad_norm": 2.32940588948083, "learning_rate": 4.917010033444816e-06, "loss": 0.1516, "step": 59700 }, { "epoch": 2.35, "grad_norm": 1.836519988909741, "learning_rate": 4.9169682274247495e-06, "loss": 0.1747, "step": 59725 }, { "epoch": 2.35, "grad_norm": 2.196327753251213, "learning_rate": 4.916926421404682e-06, "loss": 0.1765, "step": 59750 }, { "epoch": 2.35, "grad_norm": 2.2094447994990345, "learning_rate": 4.916884615384616e-06, "loss": 0.1525, "step": 59775 }, { "epoch": 2.35, "grad_norm": 2.5435910280268885, "learning_rate": 4.916842809364548e-06, "loss": 0.1588, "step": 59800 }, { "epoch": 2.35, "grad_norm": 2.1170442526101163, "learning_rate": 4.916801003344482e-06, "loss": 0.1652, "step": 59825 }, { "epoch": 2.35, "grad_norm": 4.0496458307816825, "learning_rate": 4.916759197324415e-06, "loss": 0.1676, "step": 59850 }, { "epoch": 2.36, "grad_norm": 2.5764839005683617, "learning_rate": 4.916717391304348e-06, "loss": 0.1524, "step": 59875 }, { "epoch": 2.36, "grad_norm": 1.80735502088511, "learning_rate": 4.916675585284281e-06, "loss": 0.1646, "step": 59900 }, { "epoch": 2.36, "grad_norm": 3.4931943985698877, "learning_rate": 4.9166337792642145e-06, "loss": 0.1688, "step": 59925 }, { "epoch": 2.36, "grad_norm": 2.620793187205274, "learning_rate": 4.916591973244147e-06, "loss": 0.1645, "step": 59950 }, { "epoch": 2.36, "grad_norm": 2.2352409464681644, "learning_rate": 4.916550167224081e-06, "loss": 0.1485, "step": 59975 }, { "epoch": 2.36, "grad_norm": 1.8848876549630622, "learning_rate": 4.916508361204013e-06, "loss": 0.1717, "step": 60000 }, { "epoch": 2.36, "eval_loss": 0.5126953125, "eval_runtime": 11570.5057, "eval_samples_per_second": 0.818, "eval_steps_per_second": 0.051, "eval_wer": 0.11778710808680405, "step": 60000 }, { "epoch": 2.36, "grad_norm": 2.5457840080713066, "learning_rate": 4.916466555183947e-06, "loss": 0.163, "step": 60025 }, { "epoch": 2.36, "grad_norm": 1.7137502472167718, "learning_rate": 4.91642474916388e-06, "loss": 0.153, "step": 60050 }, { "epoch": 2.36, "grad_norm": 2.0294624199856943, "learning_rate": 4.916382943143813e-06, "loss": 0.1589, "step": 60075 }, { "epoch": 2.36, "grad_norm": 2.3906210473393097, "learning_rate": 4.916341137123746e-06, "loss": 0.1652, "step": 60100 }, { "epoch": 2.37, "grad_norm": 1.2050739597355082, "learning_rate": 4.9162993311036794e-06, "loss": 0.1638, "step": 60125 }, { "epoch": 2.37, "grad_norm": 3.2900598636843386, "learning_rate": 4.916257525083612e-06, "loss": 0.1728, "step": 60150 }, { "epoch": 2.37, "grad_norm": 2.1524349935755915, "learning_rate": 4.916217391304348e-06, "loss": 0.1489, "step": 60175 }, { "epoch": 2.37, "grad_norm": 2.954050934593037, "learning_rate": 4.9161755852842815e-06, "loss": 0.1858, "step": 60200 }, { "epoch": 2.37, "grad_norm": 1.9504239110160009, "learning_rate": 4.916133779264214e-06, "loss": 0.1584, "step": 60225 }, { "epoch": 2.37, "grad_norm": 5.834701243563905, "learning_rate": 4.916091973244148e-06, "loss": 0.1653, "step": 60250 }, { "epoch": 2.37, "grad_norm": 2.504682679956055, "learning_rate": 4.9160501672240805e-06, "loss": 0.1728, "step": 60275 }, { "epoch": 2.37, "grad_norm": 2.6111928108040954, "learning_rate": 4.916008361204014e-06, "loss": 0.1695, "step": 60300 }, { "epoch": 2.37, "grad_norm": 2.7115359266814565, "learning_rate": 4.915966555183947e-06, "loss": 0.1659, "step": 60325 }, { "epoch": 2.37, "grad_norm": 2.7986136435971476, "learning_rate": 4.91592474916388e-06, "loss": 0.1561, "step": 60350 }, { "epoch": 2.38, "grad_norm": 2.2374505980103256, "learning_rate": 4.915882943143813e-06, "loss": 0.1811, "step": 60375 }, { "epoch": 2.38, "grad_norm": 2.677702691876237, "learning_rate": 4.9158411371237465e-06, "loss": 0.1578, "step": 60400 }, { "epoch": 2.38, "grad_norm": 1.710984414065094, "learning_rate": 4.915799331103679e-06, "loss": 0.1766, "step": 60425 }, { "epoch": 2.38, "grad_norm": 1.8611113380470368, "learning_rate": 4.915757525083613e-06, "loss": 0.1788, "step": 60450 }, { "epoch": 2.38, "grad_norm": 2.6445493558289472, "learning_rate": 4.9157157190635454e-06, "loss": 0.1777, "step": 60475 }, { "epoch": 2.38, "grad_norm": 2.476843097247296, "learning_rate": 4.915673913043479e-06, "loss": 0.1554, "step": 60500 }, { "epoch": 2.38, "grad_norm": 2.009772580934335, "learning_rate": 4.915632107023412e-06, "loss": 0.176, "step": 60525 }, { "epoch": 2.38, "grad_norm": 1.799057734334839, "learning_rate": 4.915590301003345e-06, "loss": 0.1687, "step": 60550 }, { "epoch": 2.38, "grad_norm": 2.574514219545785, "learning_rate": 4.915548494983278e-06, "loss": 0.1772, "step": 60575 }, { "epoch": 2.38, "grad_norm": 1.7987424850971203, "learning_rate": 4.9155066889632115e-06, "loss": 0.1662, "step": 60600 }, { "epoch": 2.39, "grad_norm": 2.08168165567613, "learning_rate": 4.915464882943144e-06, "loss": 0.1705, "step": 60625 }, { "epoch": 2.39, "grad_norm": 2.306741935160784, "learning_rate": 4.915423076923077e-06, "loss": 0.1644, "step": 60650 }, { "epoch": 2.39, "grad_norm": 1.9982050594746743, "learning_rate": 4.91538127090301e-06, "loss": 0.1637, "step": 60675 }, { "epoch": 2.39, "grad_norm": 2.3306033444527405, "learning_rate": 4.915339464882943e-06, "loss": 0.166, "step": 60700 }, { "epoch": 2.39, "grad_norm": 2.239829674488769, "learning_rate": 4.915297658862877e-06, "loss": 0.1721, "step": 60725 }, { "epoch": 2.39, "grad_norm": 2.0716748422338145, "learning_rate": 4.915255852842809e-06, "loss": 0.1665, "step": 60750 }, { "epoch": 2.39, "grad_norm": 2.3627489051626163, "learning_rate": 4.915214046822743e-06, "loss": 0.1656, "step": 60775 }, { "epoch": 2.39, "grad_norm": 2.7947561118872883, "learning_rate": 4.915172240802676e-06, "loss": 0.1821, "step": 60800 }, { "epoch": 2.39, "grad_norm": 2.481319736763784, "learning_rate": 4.915130434782609e-06, "loss": 0.1608, "step": 60825 }, { "epoch": 2.39, "grad_norm": 1.5617043799686217, "learning_rate": 4.915088628762542e-06, "loss": 0.1556, "step": 60850 }, { "epoch": 2.39, "grad_norm": 2.552707926658039, "learning_rate": 4.915046822742475e-06, "loss": 0.1748, "step": 60875 }, { "epoch": 2.4, "grad_norm": 2.4439378238092067, "learning_rate": 4.915005016722408e-06, "loss": 0.1682, "step": 60900 }, { "epoch": 2.4, "grad_norm": 1.998043095324892, "learning_rate": 4.914963210702342e-06, "loss": 0.1976, "step": 60925 }, { "epoch": 2.4, "grad_norm": 2.2108208593309526, "learning_rate": 4.914921404682274e-06, "loss": 0.1552, "step": 60950 }, { "epoch": 2.4, "grad_norm": 1.9371077350313313, "learning_rate": 4.914879598662208e-06, "loss": 0.1467, "step": 60975 }, { "epoch": 2.4, "grad_norm": 1.7039801639447034, "learning_rate": 4.9148377926421406e-06, "loss": 0.1518, "step": 61000 }, { "epoch": 2.4, "grad_norm": 1.915961676427643, "learning_rate": 4.914795986622074e-06, "loss": 0.1638, "step": 61025 }, { "epoch": 2.4, "grad_norm": 1.9113673117236556, "learning_rate": 4.914754180602007e-06, "loss": 0.1676, "step": 61050 }, { "epoch": 2.4, "grad_norm": 2.4662369168057308, "learning_rate": 4.91471237458194e-06, "loss": 0.1757, "step": 61075 }, { "epoch": 2.4, "grad_norm": 2.0060579273255077, "learning_rate": 4.914670568561873e-06, "loss": 0.1525, "step": 61100 }, { "epoch": 2.4, "grad_norm": 1.6452207663404552, "learning_rate": 4.914628762541807e-06, "loss": 0.186, "step": 61125 }, { "epoch": 2.41, "grad_norm": 2.043111214852014, "learning_rate": 4.914586956521739e-06, "loss": 0.1721, "step": 61150 }, { "epoch": 2.41, "grad_norm": 2.2998189089040224, "learning_rate": 4.914546822742475e-06, "loss": 0.173, "step": 61175 }, { "epoch": 2.41, "grad_norm": 2.229036607651321, "learning_rate": 4.914505016722409e-06, "loss": 0.1677, "step": 61200 }, { "epoch": 2.41, "grad_norm": 2.2040697290855484, "learning_rate": 4.914463210702341e-06, "loss": 0.1565, "step": 61225 }, { "epoch": 2.41, "grad_norm": 1.975337279121286, "learning_rate": 4.914421404682275e-06, "loss": 0.1567, "step": 61250 }, { "epoch": 2.41, "grad_norm": 2.1901711072719783, "learning_rate": 4.914379598662208e-06, "loss": 0.1615, "step": 61275 }, { "epoch": 2.41, "grad_norm": 2.410651010100104, "learning_rate": 4.914337792642141e-06, "loss": 0.1596, "step": 61300 }, { "epoch": 2.41, "grad_norm": 2.473836763687653, "learning_rate": 4.914295986622074e-06, "loss": 0.173, "step": 61325 }, { "epoch": 2.41, "grad_norm": 2.0361562862320617, "learning_rate": 4.914254180602007e-06, "loss": 0.1705, "step": 61350 }, { "epoch": 2.41, "grad_norm": 2.6879914919584564, "learning_rate": 4.91421237458194e-06, "loss": 0.1607, "step": 61375 }, { "epoch": 2.42, "grad_norm": 2.157305173574173, "learning_rate": 4.914170568561874e-06, "loss": 0.1512, "step": 61400 }, { "epoch": 2.42, "grad_norm": 1.9317567624460439, "learning_rate": 4.914128762541806e-06, "loss": 0.1569, "step": 61425 }, { "epoch": 2.42, "grad_norm": 2.361584589222622, "learning_rate": 4.91408695652174e-06, "loss": 0.1638, "step": 61450 }, { "epoch": 2.42, "grad_norm": 1.8644040646184097, "learning_rate": 4.914045150501673e-06, "loss": 0.1687, "step": 61475 }, { "epoch": 2.42, "grad_norm": 2.198548588498155, "learning_rate": 4.914003344481606e-06, "loss": 0.154, "step": 61500 }, { "epoch": 2.42, "grad_norm": 2.607394816578382, "learning_rate": 4.913961538461539e-06, "loss": 0.1646, "step": 61525 }, { "epoch": 2.42, "grad_norm": 2.7313264618704025, "learning_rate": 4.913919732441472e-06, "loss": 0.1666, "step": 61550 }, { "epoch": 2.42, "grad_norm": 1.8872158667961136, "learning_rate": 4.913877926421405e-06, "loss": 0.1594, "step": 61575 }, { "epoch": 2.42, "grad_norm": 2.005983861142906, "learning_rate": 4.913836120401338e-06, "loss": 0.1671, "step": 61600 }, { "epoch": 2.42, "grad_norm": 1.6811726737767227, "learning_rate": 4.913794314381271e-06, "loss": 0.1773, "step": 61625 }, { "epoch": 2.43, "grad_norm": 2.4459555545243172, "learning_rate": 4.913752508361204e-06, "loss": 0.1666, "step": 61650 }, { "epoch": 2.43, "grad_norm": 2.1227618361315246, "learning_rate": 4.9137107023411376e-06, "loss": 0.1716, "step": 61675 }, { "epoch": 2.43, "grad_norm": 2.1285845172063067, "learning_rate": 4.91366889632107e-06, "loss": 0.1703, "step": 61700 }, { "epoch": 2.43, "grad_norm": 3.090357456437784, "learning_rate": 4.913627090301004e-06, "loss": 0.1602, "step": 61725 }, { "epoch": 2.43, "grad_norm": 2.0482938851822245, "learning_rate": 4.9135852842809365e-06, "loss": 0.1505, "step": 61750 }, { "epoch": 2.43, "grad_norm": 2.5875726035509103, "learning_rate": 4.91354347826087e-06, "loss": 0.1724, "step": 61775 }, { "epoch": 2.43, "grad_norm": 1.6512222827222043, "learning_rate": 4.913501672240803e-06, "loss": 0.1617, "step": 61800 }, { "epoch": 2.43, "grad_norm": 1.8674513598205074, "learning_rate": 4.913459866220736e-06, "loss": 0.1602, "step": 61825 }, { "epoch": 2.43, "grad_norm": 2.3103847629985763, "learning_rate": 4.913419732441472e-06, "loss": 0.1747, "step": 61850 }, { "epoch": 2.43, "grad_norm": 2.3023261514395763, "learning_rate": 4.913377926421405e-06, "loss": 0.1581, "step": 61875 }, { "epoch": 2.44, "grad_norm": 1.449709317162717, "learning_rate": 4.913336120401338e-06, "loss": 0.1742, "step": 61900 }, { "epoch": 2.44, "grad_norm": 2.2658196543121774, "learning_rate": 4.913294314381271e-06, "loss": 0.1684, "step": 61925 }, { "epoch": 2.44, "grad_norm": 2.2361849806007785, "learning_rate": 4.913252508361205e-06, "loss": 0.1745, "step": 61950 }, { "epoch": 2.44, "grad_norm": 1.8218866051780491, "learning_rate": 4.913210702341137e-06, "loss": 0.1522, "step": 61975 }, { "epoch": 2.44, "grad_norm": 3.1296586951775764, "learning_rate": 4.913168896321071e-06, "loss": 0.1695, "step": 62000 }, { "epoch": 2.44, "grad_norm": 1.7770083377004013, "learning_rate": 4.9131270903010036e-06, "loss": 0.1667, "step": 62025 }, { "epoch": 2.44, "grad_norm": 2.9390687351900326, "learning_rate": 4.913085284280937e-06, "loss": 0.1539, "step": 62050 }, { "epoch": 2.44, "grad_norm": 1.7051591263423855, "learning_rate": 4.91304347826087e-06, "loss": 0.1547, "step": 62075 }, { "epoch": 2.44, "grad_norm": 3.1409256857260597, "learning_rate": 4.913001672240803e-06, "loss": 0.176, "step": 62100 }, { "epoch": 2.44, "grad_norm": 2.794092126918202, "learning_rate": 4.912959866220736e-06, "loss": 0.1676, "step": 62125 }, { "epoch": 2.45, "grad_norm": 2.165282938697665, "learning_rate": 4.91291806020067e-06, "loss": 0.1711, "step": 62150 }, { "epoch": 2.45, "grad_norm": 1.8794889473406264, "learning_rate": 4.912876254180602e-06, "loss": 0.1683, "step": 62175 }, { "epoch": 2.45, "grad_norm": 1.753755884035197, "learning_rate": 4.912834448160536e-06, "loss": 0.1782, "step": 62200 }, { "epoch": 2.45, "grad_norm": 1.691916719938932, "learning_rate": 4.9127926421404685e-06, "loss": 0.1764, "step": 62225 }, { "epoch": 2.45, "grad_norm": 2.4777575790892183, "learning_rate": 4.912750836120402e-06, "loss": 0.1778, "step": 62250 }, { "epoch": 2.45, "grad_norm": 2.082308380129261, "learning_rate": 4.912709030100335e-06, "loss": 0.1734, "step": 62275 }, { "epoch": 2.45, "grad_norm": 2.5276306526812324, "learning_rate": 4.912667224080268e-06, "loss": 0.166, "step": 62300 }, { "epoch": 2.45, "grad_norm": 1.0447729926081506, "learning_rate": 4.912625418060201e-06, "loss": 0.1519, "step": 62325 }, { "epoch": 2.45, "grad_norm": 2.1623636996029796, "learning_rate": 4.912583612040135e-06, "loss": 0.1607, "step": 62350 }, { "epoch": 2.45, "grad_norm": 2.06902259972889, "learning_rate": 4.912541806020067e-06, "loss": 0.1686, "step": 62375 }, { "epoch": 2.45, "grad_norm": 2.4778763891548006, "learning_rate": 4.912500000000001e-06, "loss": 0.1682, "step": 62400 }, { "epoch": 2.46, "grad_norm": 1.9091976161593263, "learning_rate": 4.9124581939799335e-06, "loss": 0.172, "step": 62425 }, { "epoch": 2.46, "grad_norm": 2.913027598613694, "learning_rate": 4.912416387959867e-06, "loss": 0.1601, "step": 62450 }, { "epoch": 2.46, "grad_norm": 2.007501580547803, "learning_rate": 4.9123745819398e-06, "loss": 0.1453, "step": 62475 }, { "epoch": 2.46, "grad_norm": 1.8561817066776358, "learning_rate": 4.9123327759197325e-06, "loss": 0.1705, "step": 62500 }, { "epoch": 2.46, "grad_norm": 2.161749847169862, "learning_rate": 4.912290969899666e-06, "loss": 0.1737, "step": 62525 }, { "epoch": 2.46, "grad_norm": 2.1232595660705242, "learning_rate": 4.912249163879599e-06, "loss": 0.1574, "step": 62550 }, { "epoch": 2.46, "grad_norm": 1.7837035969587884, "learning_rate": 4.912207357859532e-06, "loss": 0.1544, "step": 62575 }, { "epoch": 2.46, "grad_norm": 2.8829737147072323, "learning_rate": 4.912165551839465e-06, "loss": 0.1546, "step": 62600 }, { "epoch": 2.46, "grad_norm": 1.7869648211061147, "learning_rate": 4.9121237458193985e-06, "loss": 0.1605, "step": 62625 }, { "epoch": 2.46, "grad_norm": 2.693915151097951, "learning_rate": 4.912081939799331e-06, "loss": 0.1612, "step": 62650 }, { "epoch": 2.47, "grad_norm": 1.8652334580681769, "learning_rate": 4.912040133779265e-06, "loss": 0.1682, "step": 62675 }, { "epoch": 2.47, "grad_norm": 3.4003444849061477, "learning_rate": 4.9119983277591974e-06, "loss": 0.1905, "step": 62700 }, { "epoch": 2.47, "grad_norm": 2.3689247940924316, "learning_rate": 4.911956521739131e-06, "loss": 0.1614, "step": 62725 }, { "epoch": 2.47, "grad_norm": 2.08385825279254, "learning_rate": 4.911914715719064e-06, "loss": 0.1698, "step": 62750 }, { "epoch": 2.47, "grad_norm": 1.6497607238941634, "learning_rate": 4.911872909698997e-06, "loss": 0.16, "step": 62775 }, { "epoch": 2.47, "grad_norm": 1.5394626117283912, "learning_rate": 4.91183110367893e-06, "loss": 0.1627, "step": 62800 }, { "epoch": 2.47, "grad_norm": 2.20644693606779, "learning_rate": 4.9117892976588635e-06, "loss": 0.1673, "step": 62825 }, { "epoch": 2.47, "grad_norm": 1.0547792480076508, "learning_rate": 4.911747491638796e-06, "loss": 0.1673, "step": 62850 }, { "epoch": 2.47, "grad_norm": 2.143794378453494, "learning_rate": 4.91170568561873e-06, "loss": 0.154, "step": 62875 }, { "epoch": 2.47, "grad_norm": 2.8088862278469335, "learning_rate": 4.911663879598662e-06, "loss": 0.1577, "step": 62900 }, { "epoch": 2.48, "grad_norm": 2.5161033945231814, "learning_rate": 4.911622073578596e-06, "loss": 0.1738, "step": 62925 }, { "epoch": 2.48, "grad_norm": 2.6053312739319967, "learning_rate": 4.911580267558529e-06, "loss": 0.1561, "step": 62950 }, { "epoch": 2.48, "grad_norm": 3.093633337419838, "learning_rate": 4.911538461538462e-06, "loss": 0.1611, "step": 62975 }, { "epoch": 2.48, "grad_norm": 1.5048885696579846, "learning_rate": 4.911496655518395e-06, "loss": 0.1537, "step": 63000 }, { "epoch": 2.48, "grad_norm": 1.4850024515720803, "learning_rate": 4.9114548494983285e-06, "loss": 0.1749, "step": 63025 }, { "epoch": 2.48, "grad_norm": 2.267747673130082, "learning_rate": 4.911413043478261e-06, "loss": 0.1647, "step": 63050 }, { "epoch": 2.48, "grad_norm": 2.013113252947993, "learning_rate": 4.911371237458195e-06, "loss": 0.1516, "step": 63075 }, { "epoch": 2.48, "grad_norm": 1.373805904984799, "learning_rate": 4.911329431438127e-06, "loss": 0.1644, "step": 63100 }, { "epoch": 2.48, "grad_norm": 2.012563965344033, "learning_rate": 4.911287625418061e-06, "loss": 0.1786, "step": 63125 }, { "epoch": 2.48, "grad_norm": 2.510353666710276, "learning_rate": 4.911245819397994e-06, "loss": 0.1585, "step": 63150 }, { "epoch": 2.49, "grad_norm": 1.894923289500306, "learning_rate": 4.911204013377927e-06, "loss": 0.1642, "step": 63175 }, { "epoch": 2.49, "grad_norm": 2.9141467079712533, "learning_rate": 4.91116220735786e-06, "loss": 0.1633, "step": 63200 }, { "epoch": 2.49, "grad_norm": 2.1543105842708106, "learning_rate": 4.911120401337793e-06, "loss": 0.1817, "step": 63225 }, { "epoch": 2.49, "grad_norm": 2.196066091671809, "learning_rate": 4.911078595317726e-06, "loss": 0.1654, "step": 63250 }, { "epoch": 2.49, "grad_norm": 2.574609823964885, "learning_rate": 4.911036789297659e-06, "loss": 0.165, "step": 63275 }, { "epoch": 2.49, "grad_norm": 1.7239441756225606, "learning_rate": 4.910994983277592e-06, "loss": 0.1449, "step": 63300 }, { "epoch": 2.49, "grad_norm": 2.155397841796658, "learning_rate": 4.910953177257525e-06, "loss": 0.1639, "step": 63325 }, { "epoch": 2.49, "grad_norm": 2.0676428516731504, "learning_rate": 4.910911371237459e-06, "loss": 0.1544, "step": 63350 }, { "epoch": 2.49, "grad_norm": 2.908000877442016, "learning_rate": 4.910869565217391e-06, "loss": 0.1663, "step": 63375 }, { "epoch": 2.49, "grad_norm": 1.2534265614289273, "learning_rate": 4.910827759197325e-06, "loss": 0.1467, "step": 63400 }, { "epoch": 2.5, "grad_norm": 2.6717733780311184, "learning_rate": 4.9107859531772576e-06, "loss": 0.1734, "step": 63425 }, { "epoch": 2.5, "grad_norm": 2.115600821956127, "learning_rate": 4.910744147157191e-06, "loss": 0.1538, "step": 63450 }, { "epoch": 2.5, "grad_norm": 1.7077889016665388, "learning_rate": 4.910702341137124e-06, "loss": 0.1645, "step": 63475 }, { "epoch": 2.5, "grad_norm": 2.079355530724143, "learning_rate": 4.910660535117057e-06, "loss": 0.1635, "step": 63500 }, { "epoch": 2.5, "grad_norm": 1.6935640568962875, "learning_rate": 4.91061872909699e-06, "loss": 0.1669, "step": 63525 }, { "epoch": 2.5, "grad_norm": 2.501349861253381, "learning_rate": 4.910576923076924e-06, "loss": 0.1618, "step": 63550 }, { "epoch": 2.5, "grad_norm": 2.258413469914614, "learning_rate": 4.910535117056856e-06, "loss": 0.1594, "step": 63575 }, { "epoch": 2.5, "grad_norm": 3.048614342796082, "learning_rate": 4.91049331103679e-06, "loss": 0.1712, "step": 63600 }, { "epoch": 2.5, "grad_norm": 3.199999060663906, "learning_rate": 4.9104515050167225e-06, "loss": 0.1532, "step": 63625 }, { "epoch": 2.5, "grad_norm": 1.0916681640164947, "learning_rate": 4.910409698996656e-06, "loss": 0.1793, "step": 63650 }, { "epoch": 2.51, "grad_norm": 1.5617092875552199, "learning_rate": 4.910367892976589e-06, "loss": 0.1502, "step": 63675 }, { "epoch": 2.51, "grad_norm": 2.431000733781789, "learning_rate": 4.910326086956522e-06, "loss": 0.1618, "step": 63700 }, { "epoch": 2.51, "grad_norm": 2.03889062924083, "learning_rate": 4.910284280936455e-06, "loss": 0.1408, "step": 63725 }, { "epoch": 2.51, "grad_norm": 3.1713570023080253, "learning_rate": 4.9102424749163886e-06, "loss": 0.1626, "step": 63750 }, { "epoch": 2.51, "grad_norm": 1.4983183955799593, "learning_rate": 4.910200668896321e-06, "loss": 0.1668, "step": 63775 }, { "epoch": 2.51, "grad_norm": 1.588454811441986, "learning_rate": 4.910158862876255e-06, "loss": 0.1438, "step": 63800 }, { "epoch": 2.51, "grad_norm": 1.8740606752064477, "learning_rate": 4.9101170568561875e-06, "loss": 0.1613, "step": 63825 }, { "epoch": 2.51, "grad_norm": 1.863894051691388, "learning_rate": 4.910076923076923e-06, "loss": 0.1711, "step": 63850 }, { "epoch": 2.51, "grad_norm": 2.9926581104511647, "learning_rate": 4.910035117056857e-06, "loss": 0.1759, "step": 63875 }, { "epoch": 2.51, "grad_norm": 1.9004004513452737, "learning_rate": 4.90999331103679e-06, "loss": 0.1798, "step": 63900 }, { "epoch": 2.51, "grad_norm": 2.7288616511230606, "learning_rate": 4.909951505016723e-06, "loss": 0.1736, "step": 63925 }, { "epoch": 2.52, "grad_norm": 2.0606551832809346, "learning_rate": 4.909909698996656e-06, "loss": 0.1632, "step": 63950 }, { "epoch": 2.52, "grad_norm": 2.2478370244128305, "learning_rate": 4.909867892976589e-06, "loss": 0.1657, "step": 63975 }, { "epoch": 2.52, "grad_norm": 1.6587952879693575, "learning_rate": 4.909826086956522e-06, "loss": 0.1754, "step": 64000 }, { "epoch": 2.52, "grad_norm": 2.3087314952369464, "learning_rate": 4.909784280936456e-06, "loss": 0.1749, "step": 64025 }, { "epoch": 2.52, "grad_norm": 3.886888875975629, "learning_rate": 4.909742474916388e-06, "loss": 0.1704, "step": 64050 }, { "epoch": 2.52, "grad_norm": 2.5427882532308765, "learning_rate": 4.909700668896322e-06, "loss": 0.1669, "step": 64075 }, { "epoch": 2.52, "grad_norm": 2.6760075354807005, "learning_rate": 4.9096588628762546e-06, "loss": 0.1585, "step": 64100 }, { "epoch": 2.52, "grad_norm": 2.1830101383040494, "learning_rate": 4.909617056856188e-06, "loss": 0.1607, "step": 64125 }, { "epoch": 2.52, "grad_norm": 2.215983424795264, "learning_rate": 4.909575250836121e-06, "loss": 0.1649, "step": 64150 }, { "epoch": 2.52, "grad_norm": 2.471626141069725, "learning_rate": 4.9095334448160535e-06, "loss": 0.1478, "step": 64175 }, { "epoch": 2.53, "grad_norm": 2.65952247452563, "learning_rate": 4.909491638795987e-06, "loss": 0.1804, "step": 64200 }, { "epoch": 2.53, "grad_norm": 1.6232088149469335, "learning_rate": 4.90944983277592e-06, "loss": 0.1594, "step": 64225 }, { "epoch": 2.53, "grad_norm": 2.030118235294256, "learning_rate": 4.909408026755853e-06, "loss": 0.1521, "step": 64250 }, { "epoch": 2.53, "grad_norm": 2.872547185240935, "learning_rate": 4.909366220735786e-06, "loss": 0.1704, "step": 64275 }, { "epoch": 2.53, "grad_norm": 2.5078969587091784, "learning_rate": 4.9093244147157195e-06, "loss": 0.1573, "step": 64300 }, { "epoch": 2.53, "grad_norm": 1.9992021677403815, "learning_rate": 4.909282608695652e-06, "loss": 0.1498, "step": 64325 }, { "epoch": 2.53, "grad_norm": 2.503697200714061, "learning_rate": 4.909240802675586e-06, "loss": 0.1568, "step": 64350 }, { "epoch": 2.53, "grad_norm": 1.9626698849941484, "learning_rate": 4.9091989966555185e-06, "loss": 0.1539, "step": 64375 }, { "epoch": 2.53, "grad_norm": 2.490978407286913, "learning_rate": 4.909157190635452e-06, "loss": 0.1551, "step": 64400 }, { "epoch": 2.53, "grad_norm": 1.783432543381823, "learning_rate": 4.909115384615385e-06, "loss": 0.1824, "step": 64425 }, { "epoch": 2.54, "grad_norm": 2.0354994054583373, "learning_rate": 4.909073578595318e-06, "loss": 0.1626, "step": 64450 }, { "epoch": 2.54, "grad_norm": 2.9963627176197933, "learning_rate": 4.909031772575251e-06, "loss": 0.1672, "step": 64475 }, { "epoch": 2.54, "grad_norm": 2.57838119622678, "learning_rate": 4.9089899665551845e-06, "loss": 0.1816, "step": 64500 }, { "epoch": 2.54, "grad_norm": 2.4899750928360356, "learning_rate": 4.908948160535117e-06, "loss": 0.1634, "step": 64525 }, { "epoch": 2.54, "grad_norm": 2.1326207915213065, "learning_rate": 4.908906354515051e-06, "loss": 0.163, "step": 64550 }, { "epoch": 2.54, "grad_norm": 1.7329610880996327, "learning_rate": 4.9088645484949835e-06, "loss": 0.1631, "step": 64575 }, { "epoch": 2.54, "grad_norm": 1.597797070784199, "learning_rate": 4.908822742474917e-06, "loss": 0.1481, "step": 64600 }, { "epoch": 2.54, "grad_norm": 1.593120783537612, "learning_rate": 4.90878093645485e-06, "loss": 0.1619, "step": 64625 }, { "epoch": 2.54, "grad_norm": 1.3340178656573458, "learning_rate": 4.908739130434783e-06, "loss": 0.1619, "step": 64650 }, { "epoch": 2.54, "grad_norm": 3.027825908379043, "learning_rate": 4.908697324414716e-06, "loss": 0.1784, "step": 64675 }, { "epoch": 2.55, "grad_norm": 2.3061907558400665, "learning_rate": 4.9086555183946495e-06, "loss": 0.1728, "step": 64700 }, { "epoch": 2.55, "grad_norm": 1.8145354193239998, "learning_rate": 4.908613712374582e-06, "loss": 0.1677, "step": 64725 }, { "epoch": 2.55, "grad_norm": 3.2098623467674083, "learning_rate": 4.908571906354516e-06, "loss": 0.1799, "step": 64750 }, { "epoch": 2.55, "grad_norm": 1.9804590850895585, "learning_rate": 4.9085301003344484e-06, "loss": 0.1655, "step": 64775 }, { "epoch": 2.55, "grad_norm": 2.647426146096992, "learning_rate": 4.908488294314382e-06, "loss": 0.1464, "step": 64800 }, { "epoch": 2.55, "grad_norm": 2.9055118736851995, "learning_rate": 4.908446488294315e-06, "loss": 0.1771, "step": 64825 }, { "epoch": 2.55, "grad_norm": 1.8514368286597422, "learning_rate": 4.9084063545150505e-06, "loss": 0.174, "step": 64850 }, { "epoch": 2.55, "grad_norm": 2.9887629605918113, "learning_rate": 4.908364548494984e-06, "loss": 0.1621, "step": 64875 }, { "epoch": 2.55, "grad_norm": 2.4961759409869595, "learning_rate": 4.908322742474917e-06, "loss": 0.1472, "step": 64900 }, { "epoch": 2.55, "grad_norm": 1.4943853813301795, "learning_rate": 4.90828093645485e-06, "loss": 0.159, "step": 64925 }, { "epoch": 2.56, "grad_norm": 2.1198505134386583, "learning_rate": 4.908239130434783e-06, "loss": 0.1427, "step": 64950 }, { "epoch": 2.56, "grad_norm": 1.9455591466937527, "learning_rate": 4.9081973244147165e-06, "loss": 0.1655, "step": 64975 }, { "epoch": 2.56, "grad_norm": 2.276710046339653, "learning_rate": 4.908155518394649e-06, "loss": 0.1622, "step": 65000 }, { "epoch": 2.56, "grad_norm": 2.112276112652573, "learning_rate": 4.908113712374583e-06, "loss": 0.1697, "step": 65025 }, { "epoch": 2.56, "grad_norm": 2.465633305808334, "learning_rate": 4.9080719063545155e-06, "loss": 0.154, "step": 65050 }, { "epoch": 2.56, "grad_norm": 2.393310551281838, "learning_rate": 4.908030100334449e-06, "loss": 0.1471, "step": 65075 }, { "epoch": 2.56, "grad_norm": 2.4821237816549835, "learning_rate": 4.907988294314382e-06, "loss": 0.1585, "step": 65100 }, { "epoch": 2.56, "grad_norm": 2.3884124180340605, "learning_rate": 4.9079464882943144e-06, "loss": 0.1573, "step": 65125 }, { "epoch": 2.56, "grad_norm": 2.8117247832441796, "learning_rate": 4.907904682274248e-06, "loss": 0.1629, "step": 65150 }, { "epoch": 2.56, "grad_norm": 2.8371308882763624, "learning_rate": 4.907862876254181e-06, "loss": 0.164, "step": 65175 }, { "epoch": 2.57, "grad_norm": 2.677839446744097, "learning_rate": 4.907821070234114e-06, "loss": 0.1646, "step": 65200 }, { "epoch": 2.57, "grad_norm": 2.1656191975374735, "learning_rate": 4.907779264214047e-06, "loss": 0.1666, "step": 65225 }, { "epoch": 2.57, "grad_norm": 1.9170574359950836, "learning_rate": 4.9077374581939805e-06, "loss": 0.1752, "step": 65250 }, { "epoch": 2.57, "grad_norm": 2.2651721052092806, "learning_rate": 4.907695652173913e-06, "loss": 0.1652, "step": 65275 }, { "epoch": 2.57, "grad_norm": 2.848088945866107, "learning_rate": 4.907653846153847e-06, "loss": 0.1677, "step": 65300 }, { "epoch": 2.57, "grad_norm": 2.3172246339739884, "learning_rate": 4.907612040133779e-06, "loss": 0.1718, "step": 65325 }, { "epoch": 2.57, "grad_norm": 2.8405472169057213, "learning_rate": 4.907570234113713e-06, "loss": 0.1616, "step": 65350 }, { "epoch": 2.57, "grad_norm": 1.917029554946475, "learning_rate": 4.907528428093646e-06, "loss": 0.1669, "step": 65375 }, { "epoch": 2.57, "grad_norm": 2.2302027109018785, "learning_rate": 4.907486622073579e-06, "loss": 0.1707, "step": 65400 }, { "epoch": 2.57, "grad_norm": 2.1756400588268625, "learning_rate": 4.907444816053512e-06, "loss": 0.1797, "step": 65425 }, { "epoch": 2.57, "grad_norm": 2.441329759624097, "learning_rate": 4.9074030100334454e-06, "loss": 0.1632, "step": 65450 }, { "epoch": 2.58, "grad_norm": 2.6632498992499993, "learning_rate": 4.907361204013378e-06, "loss": 0.185, "step": 65475 }, { "epoch": 2.58, "grad_norm": 1.7352876275637894, "learning_rate": 4.907319397993312e-06, "loss": 0.1627, "step": 65500 }, { "epoch": 2.58, "grad_norm": 2.4227494238068776, "learning_rate": 4.907277591973244e-06, "loss": 0.1605, "step": 65525 }, { "epoch": 2.58, "grad_norm": 2.076807380471068, "learning_rate": 4.907235785953178e-06, "loss": 0.1692, "step": 65550 }, { "epoch": 2.58, "grad_norm": 1.9389772507642347, "learning_rate": 4.907193979933111e-06, "loss": 0.1554, "step": 65575 }, { "epoch": 2.58, "grad_norm": 2.548984641939041, "learning_rate": 4.907152173913044e-06, "loss": 0.1627, "step": 65600 }, { "epoch": 2.58, "grad_norm": 2.6788514190264143, "learning_rate": 4.907110367892977e-06, "loss": 0.1436, "step": 65625 }, { "epoch": 2.58, "grad_norm": 3.1995893686446286, "learning_rate": 4.90706856187291e-06, "loss": 0.178, "step": 65650 }, { "epoch": 2.58, "grad_norm": 1.6593985754679415, "learning_rate": 4.907026755852843e-06, "loss": 0.1685, "step": 65675 }, { "epoch": 2.58, "grad_norm": 2.317242944867583, "learning_rate": 4.906984949832777e-06, "loss": 0.1656, "step": 65700 }, { "epoch": 2.59, "grad_norm": 2.8102006179505628, "learning_rate": 4.906943143812709e-06, "loss": 0.1587, "step": 65725 }, { "epoch": 2.59, "grad_norm": 2.201863264339552, "learning_rate": 4.906901337792643e-06, "loss": 0.1607, "step": 65750 }, { "epoch": 2.59, "grad_norm": 1.702427793526987, "learning_rate": 4.906859531772576e-06, "loss": 0.1755, "step": 65775 }, { "epoch": 2.59, "grad_norm": 2.6686775607203446, "learning_rate": 4.906817725752509e-06, "loss": 0.1396, "step": 65800 }, { "epoch": 2.59, "grad_norm": 2.6544320471662886, "learning_rate": 4.906775919732442e-06, "loss": 0.1568, "step": 65825 }, { "epoch": 2.59, "grad_norm": 0.7099841241182607, "learning_rate": 4.906735785953178e-06, "loss": 0.1543, "step": 65850 }, { "epoch": 2.59, "grad_norm": 1.9112198873388229, "learning_rate": 4.906693979933111e-06, "loss": 0.1526, "step": 65875 }, { "epoch": 2.59, "grad_norm": 2.3657780976079748, "learning_rate": 4.906652173913044e-06, "loss": 0.1566, "step": 65900 }, { "epoch": 2.59, "grad_norm": 1.9480778857317624, "learning_rate": 4.9066103678929775e-06, "loss": 0.1585, "step": 65925 }, { "epoch": 2.59, "grad_norm": 2.942927544287174, "learning_rate": 4.90656856187291e-06, "loss": 0.1671, "step": 65950 }, { "epoch": 2.6, "grad_norm": 1.9627043489734612, "learning_rate": 4.906526755852844e-06, "loss": 0.1725, "step": 65975 }, { "epoch": 2.6, "grad_norm": 1.696701666683548, "learning_rate": 4.906486622073579e-06, "loss": 0.1576, "step": 66000 }, { "epoch": 2.6, "grad_norm": 1.4217429930751502, "learning_rate": 4.906444816053512e-06, "loss": 0.164, "step": 66025 }, { "epoch": 2.6, "grad_norm": 2.549382249171763, "learning_rate": 4.906403010033445e-06, "loss": 0.1618, "step": 66050 }, { "epoch": 2.6, "grad_norm": 2.2060418166923053, "learning_rate": 4.9063612040133785e-06, "loss": 0.1752, "step": 66075 }, { "epoch": 2.6, "grad_norm": 2.526241507559611, "learning_rate": 4.906319397993311e-06, "loss": 0.1661, "step": 66100 }, { "epoch": 2.6, "grad_norm": 2.024390593256092, "learning_rate": 4.906277591973245e-06, "loss": 0.1674, "step": 66125 }, { "epoch": 2.6, "grad_norm": 2.5548120313238436, "learning_rate": 4.9062357859531774e-06, "loss": 0.1555, "step": 66150 }, { "epoch": 2.6, "grad_norm": 1.7203300785919162, "learning_rate": 4.906193979933111e-06, "loss": 0.1654, "step": 66175 }, { "epoch": 2.6, "grad_norm": 2.1977595422357936, "learning_rate": 4.9061521739130445e-06, "loss": 0.1771, "step": 66200 }, { "epoch": 2.61, "grad_norm": 1.7535093547919756, "learning_rate": 4.906110367892977e-06, "loss": 0.1603, "step": 66225 }, { "epoch": 2.61, "grad_norm": 2.154775437570024, "learning_rate": 4.906068561872911e-06, "loss": 0.1671, "step": 66250 }, { "epoch": 2.61, "grad_norm": 2.957733586615425, "learning_rate": 4.906026755852843e-06, "loss": 0.1551, "step": 66275 }, { "epoch": 2.61, "grad_norm": 2.017968097634726, "learning_rate": 4.905984949832776e-06, "loss": 0.1718, "step": 66300 }, { "epoch": 2.61, "grad_norm": 2.1178933664306134, "learning_rate": 4.905943143812709e-06, "loss": 0.1418, "step": 66325 }, { "epoch": 2.61, "grad_norm": 2.2771629555356636, "learning_rate": 4.905901337792642e-06, "loss": 0.1591, "step": 66350 }, { "epoch": 2.61, "grad_norm": 2.114269650050633, "learning_rate": 4.905859531772575e-06, "loss": 0.157, "step": 66375 }, { "epoch": 2.61, "grad_norm": 1.8226274914884266, "learning_rate": 4.905817725752509e-06, "loss": 0.1586, "step": 66400 }, { "epoch": 2.61, "grad_norm": 2.2937073243363506, "learning_rate": 4.905775919732441e-06, "loss": 0.1605, "step": 66425 }, { "epoch": 2.61, "grad_norm": 1.521627303225354, "learning_rate": 4.905734113712375e-06, "loss": 0.1731, "step": 66450 }, { "epoch": 2.62, "grad_norm": 2.1538938575588253, "learning_rate": 4.905692307692308e-06, "loss": 0.1662, "step": 66475 }, { "epoch": 2.62, "grad_norm": 2.1191327411063714, "learning_rate": 4.905650501672241e-06, "loss": 0.1563, "step": 66500 }, { "epoch": 2.62, "grad_norm": 1.9074138182924498, "learning_rate": 4.905608695652174e-06, "loss": 0.155, "step": 66525 }, { "epoch": 2.62, "grad_norm": 2.914625498132579, "learning_rate": 4.905566889632107e-06, "loss": 0.1624, "step": 66550 }, { "epoch": 2.62, "grad_norm": 2.6539242263530385, "learning_rate": 4.90552508361204e-06, "loss": 0.1714, "step": 66575 }, { "epoch": 2.62, "grad_norm": 2.088674751899696, "learning_rate": 4.905483277591974e-06, "loss": 0.1714, "step": 66600 }, { "epoch": 2.62, "grad_norm": 2.2135373112639694, "learning_rate": 4.905441471571907e-06, "loss": 0.1584, "step": 66625 }, { "epoch": 2.62, "grad_norm": 1.4192960242700194, "learning_rate": 4.90539966555184e-06, "loss": 0.1591, "step": 66650 }, { "epoch": 2.62, "grad_norm": 1.7390386910080664, "learning_rate": 4.905357859531773e-06, "loss": 0.1769, "step": 66675 }, { "epoch": 2.62, "grad_norm": 2.232094694142498, "learning_rate": 4.905316053511706e-06, "loss": 0.1455, "step": 66700 }, { "epoch": 2.63, "grad_norm": 3.0138843173390297, "learning_rate": 4.90527424749164e-06, "loss": 0.1514, "step": 66725 }, { "epoch": 2.63, "grad_norm": 2.0070506284026393, "learning_rate": 4.905232441471572e-06, "loss": 0.1866, "step": 66750 }, { "epoch": 2.63, "grad_norm": 1.4365348345881692, "learning_rate": 4.905190635451506e-06, "loss": 0.1739, "step": 66775 }, { "epoch": 2.63, "grad_norm": 2.0958303312111766, "learning_rate": 4.905148829431439e-06, "loss": 0.1554, "step": 66800 }, { "epoch": 2.63, "grad_norm": 2.4725145319946504, "learning_rate": 4.905107023411372e-06, "loss": 0.1675, "step": 66825 }, { "epoch": 2.63, "grad_norm": 2.718854874162693, "learning_rate": 4.905065217391305e-06, "loss": 0.1548, "step": 66850 }, { "epoch": 2.63, "grad_norm": 3.0116226602644884, "learning_rate": 4.905023411371238e-06, "loss": 0.1478, "step": 66875 }, { "epoch": 2.63, "grad_norm": 2.8899799082771738, "learning_rate": 4.904981605351171e-06, "loss": 0.161, "step": 66900 }, { "epoch": 2.63, "grad_norm": 2.3658188452538966, "learning_rate": 4.904939799331105e-06, "loss": 0.1599, "step": 66925 }, { "epoch": 2.63, "grad_norm": 2.509051426616327, "learning_rate": 4.904897993311037e-06, "loss": 0.1614, "step": 66950 }, { "epoch": 2.63, "grad_norm": 3.211165300889533, "learning_rate": 4.904856187290971e-06, "loss": 0.1553, "step": 66975 }, { "epoch": 2.64, "grad_norm": 1.6733992628510892, "learning_rate": 4.904814381270903e-06, "loss": 0.1711, "step": 67000 }, { "epoch": 2.64, "grad_norm": 2.3085595836756876, "learning_rate": 4.904772575250836e-06, "loss": 0.167, "step": 67025 }, { "epoch": 2.64, "grad_norm": 2.9511587121489784, "learning_rate": 4.90473076923077e-06, "loss": 0.1802, "step": 67050 }, { "epoch": 2.64, "grad_norm": 1.7009959258288003, "learning_rate": 4.9046889632107025e-06, "loss": 0.1695, "step": 67075 }, { "epoch": 2.64, "grad_norm": 1.861255834759058, "learning_rate": 4.904647157190636e-06, "loss": 0.1708, "step": 67100 }, { "epoch": 2.64, "grad_norm": 2.332246734287938, "learning_rate": 4.904605351170569e-06, "loss": 0.1647, "step": 67125 }, { "epoch": 2.64, "grad_norm": 3.0582558350326523, "learning_rate": 4.904563545150502e-06, "loss": 0.1568, "step": 67150 }, { "epoch": 2.64, "grad_norm": 2.838261030151132, "learning_rate": 4.904521739130435e-06, "loss": 0.1527, "step": 67175 }, { "epoch": 2.64, "grad_norm": 2.9315235968632813, "learning_rate": 4.9044799331103686e-06, "loss": 0.1829, "step": 67200 }, { "epoch": 2.64, "grad_norm": 2.167459167955874, "learning_rate": 4.904438127090301e-06, "loss": 0.1524, "step": 67225 }, { "epoch": 2.65, "grad_norm": 3.8651054355321226, "learning_rate": 4.904396321070235e-06, "loss": 0.1765, "step": 67250 }, { "epoch": 2.65, "grad_norm": 1.974004440932649, "learning_rate": 4.9043545150501675e-06, "loss": 0.179, "step": 67275 }, { "epoch": 2.65, "grad_norm": 1.7036311477514448, "learning_rate": 4.904312709030101e-06, "loss": 0.1502, "step": 67300 }, { "epoch": 2.65, "grad_norm": 2.3390162169708817, "learning_rate": 4.904270903010034e-06, "loss": 0.1632, "step": 67325 }, { "epoch": 2.65, "grad_norm": 2.763794342227092, "learning_rate": 4.904229096989967e-06, "loss": 0.177, "step": 67350 }, { "epoch": 2.65, "grad_norm": 2.5359226898254685, "learning_rate": 4.9041872909699e-06, "loss": 0.1675, "step": 67375 }, { "epoch": 2.65, "grad_norm": 3.1775710087532154, "learning_rate": 4.9041454849498335e-06, "loss": 0.1738, "step": 67400 }, { "epoch": 2.65, "grad_norm": 2.345033829775269, "learning_rate": 4.904103678929766e-06, "loss": 0.1675, "step": 67425 }, { "epoch": 2.65, "grad_norm": 2.336885577490091, "learning_rate": 4.9040618729097e-06, "loss": 0.1517, "step": 67450 }, { "epoch": 2.65, "grad_norm": 2.4565329151898867, "learning_rate": 4.9040200668896325e-06, "loss": 0.1669, "step": 67475 }, { "epoch": 2.66, "grad_norm": 2.5585596954556937, "learning_rate": 4.903978260869566e-06, "loss": 0.1685, "step": 67500 }, { "epoch": 2.66, "grad_norm": 1.7937763982225357, "learning_rate": 4.903936454849499e-06, "loss": 0.1479, "step": 67525 }, { "epoch": 2.66, "grad_norm": 1.6019104450697434, "learning_rate": 4.903894648829432e-06, "loss": 0.1623, "step": 67550 }, { "epoch": 2.66, "grad_norm": 2.4719412168073753, "learning_rate": 4.903852842809365e-06, "loss": 0.1754, "step": 67575 }, { "epoch": 2.66, "grad_norm": 2.370947438562601, "learning_rate": 4.9038110367892985e-06, "loss": 0.1644, "step": 67600 }, { "epoch": 2.66, "grad_norm": 3.131913579404752, "learning_rate": 4.903769230769231e-06, "loss": 0.169, "step": 67625 }, { "epoch": 2.66, "grad_norm": 1.743261368460137, "learning_rate": 4.903727424749165e-06, "loss": 0.1668, "step": 67650 }, { "epoch": 2.66, "grad_norm": 1.3480808608244685, "learning_rate": 4.9036856187290975e-06, "loss": 0.164, "step": 67675 }, { "epoch": 2.66, "grad_norm": 1.8534317999026895, "learning_rate": 4.90364381270903e-06, "loss": 0.1497, "step": 67700 }, { "epoch": 2.66, "grad_norm": 2.960335057951582, "learning_rate": 4.903602006688964e-06, "loss": 0.1727, "step": 67725 }, { "epoch": 2.67, "grad_norm": 2.423990432992723, "learning_rate": 4.903560200668896e-06, "loss": 0.1529, "step": 67750 }, { "epoch": 2.67, "grad_norm": 2.0462023472811737, "learning_rate": 4.90351839464883e-06, "loss": 0.1727, "step": 67775 }, { "epoch": 2.67, "grad_norm": 2.6099807901854786, "learning_rate": 4.903476588628763e-06, "loss": 0.1578, "step": 67800 }, { "epoch": 2.67, "grad_norm": 2.170286265230943, "learning_rate": 4.903434782608696e-06, "loss": 0.1584, "step": 67825 }, { "epoch": 2.67, "grad_norm": 2.3937380773545813, "learning_rate": 4.903392976588629e-06, "loss": 0.1791, "step": 67850 }, { "epoch": 2.67, "grad_norm": 1.5374488563219373, "learning_rate": 4.9033511705685624e-06, "loss": 0.1427, "step": 67875 }, { "epoch": 2.67, "grad_norm": 3.2583070199750472, "learning_rate": 4.903309364548495e-06, "loss": 0.1636, "step": 67900 }, { "epoch": 2.67, "grad_norm": 1.8468048091641243, "learning_rate": 4.903267558528429e-06, "loss": 0.1519, "step": 67925 }, { "epoch": 2.67, "grad_norm": 2.6291588159527373, "learning_rate": 4.903225752508361e-06, "loss": 0.1758, "step": 67950 }, { "epoch": 2.67, "grad_norm": 3.7050360639335347, "learning_rate": 4.903183946488295e-06, "loss": 0.1749, "step": 67975 }, { "epoch": 2.68, "grad_norm": 1.7682431370811424, "learning_rate": 4.90314381270903e-06, "loss": 0.1573, "step": 68000 }, { "epoch": 2.68, "grad_norm": 2.3824688003115226, "learning_rate": 4.9031020066889635e-06, "loss": 0.1468, "step": 68025 }, { "epoch": 2.68, "grad_norm": 1.7485707314388916, "learning_rate": 4.903060200668896e-06, "loss": 0.1615, "step": 68050 }, { "epoch": 2.68, "grad_norm": 1.7128641702363026, "learning_rate": 4.90301839464883e-06, "loss": 0.1592, "step": 68075 }, { "epoch": 2.68, "grad_norm": 2.075482147796586, "learning_rate": 4.902976588628762e-06, "loss": 0.1352, "step": 68100 }, { "epoch": 2.68, "grad_norm": 1.6209920469074521, "learning_rate": 4.902934782608696e-06, "loss": 0.1712, "step": 68125 }, { "epoch": 2.68, "grad_norm": 2.2059154225617035, "learning_rate": 4.902892976588629e-06, "loss": 0.1582, "step": 68150 }, { "epoch": 2.68, "grad_norm": 1.5679563332864526, "learning_rate": 4.902851170568562e-06, "loss": 0.1644, "step": 68175 }, { "epoch": 2.68, "grad_norm": 1.3764731345936694, "learning_rate": 4.902811036789298e-06, "loss": 0.1656, "step": 68200 }, { "epoch": 2.68, "grad_norm": 0.9139328926308926, "learning_rate": 4.902769230769231e-06, "loss": 0.167, "step": 68225 }, { "epoch": 2.68, "grad_norm": 2.2718914414232003, "learning_rate": 4.902727424749164e-06, "loss": 0.1524, "step": 68250 }, { "epoch": 2.69, "grad_norm": 2.0111405659125294, "learning_rate": 4.902685618729097e-06, "loss": 0.1858, "step": 68275 }, { "epoch": 2.69, "grad_norm": 2.2865058081144105, "learning_rate": 4.9026438127090305e-06, "loss": 0.1537, "step": 68300 }, { "epoch": 2.69, "grad_norm": 2.1726475243792116, "learning_rate": 4.902602006688963e-06, "loss": 0.1633, "step": 68325 }, { "epoch": 2.69, "grad_norm": 2.8799278645516595, "learning_rate": 4.902560200668897e-06, "loss": 0.1432, "step": 68350 }, { "epoch": 2.69, "grad_norm": 3.1468444524866888, "learning_rate": 4.9025183946488294e-06, "loss": 0.1729, "step": 68375 }, { "epoch": 2.69, "grad_norm": 1.6869292960241524, "learning_rate": 4.902476588628763e-06, "loss": 0.1692, "step": 68400 }, { "epoch": 2.69, "grad_norm": 2.2943763214968387, "learning_rate": 4.902434782608696e-06, "loss": 0.1523, "step": 68425 }, { "epoch": 2.69, "grad_norm": 0.9078772412947974, "learning_rate": 4.902392976588629e-06, "loss": 0.1489, "step": 68450 }, { "epoch": 2.69, "grad_norm": 2.624638040883813, "learning_rate": 4.902351170568562e-06, "loss": 0.1663, "step": 68475 }, { "epoch": 2.69, "grad_norm": 2.0599344273565725, "learning_rate": 4.9023093645484955e-06, "loss": 0.1476, "step": 68500 }, { "epoch": 2.7, "grad_norm": 2.132918351620154, "learning_rate": 4.902267558528428e-06, "loss": 0.1706, "step": 68525 }, { "epoch": 2.7, "grad_norm": 2.2024926639954443, "learning_rate": 4.902225752508362e-06, "loss": 0.1638, "step": 68550 }, { "epoch": 2.7, "grad_norm": 2.00228229652154, "learning_rate": 4.9021839464882944e-06, "loss": 0.1745, "step": 68575 }, { "epoch": 2.7, "grad_norm": 2.4173513208178314, "learning_rate": 4.902142140468228e-06, "loss": 0.1667, "step": 68600 }, { "epoch": 2.7, "grad_norm": 1.4535956046540373, "learning_rate": 4.902100334448161e-06, "loss": 0.1872, "step": 68625 }, { "epoch": 2.7, "grad_norm": 1.5770697206236601, "learning_rate": 4.902058528428094e-06, "loss": 0.1584, "step": 68650 }, { "epoch": 2.7, "grad_norm": 2.1907711996947206, "learning_rate": 4.902016722408027e-06, "loss": 0.1614, "step": 68675 }, { "epoch": 2.7, "grad_norm": 1.5996563388129679, "learning_rate": 4.9019749163879605e-06, "loss": 0.1528, "step": 68700 }, { "epoch": 2.7, "grad_norm": 1.5217021335901246, "learning_rate": 4.901933110367893e-06, "loss": 0.1583, "step": 68725 }, { "epoch": 2.7, "grad_norm": 2.830636659279085, "learning_rate": 4.901891304347827e-06, "loss": 0.16, "step": 68750 }, { "epoch": 2.71, "grad_norm": 3.132431011223764, "learning_rate": 4.901849498327759e-06, "loss": 0.1755, "step": 68775 }, { "epoch": 2.71, "grad_norm": 2.954810412804257, "learning_rate": 4.901807692307693e-06, "loss": 0.1593, "step": 68800 }, { "epoch": 2.71, "grad_norm": 1.9873971899648006, "learning_rate": 4.901765886287626e-06, "loss": 0.1576, "step": 68825 }, { "epoch": 2.71, "grad_norm": 1.8546844581500215, "learning_rate": 4.901724080267559e-06, "loss": 0.1486, "step": 68850 }, { "epoch": 2.71, "grad_norm": 2.699256024899676, "learning_rate": 4.901682274247492e-06, "loss": 0.1497, "step": 68875 }, { "epoch": 2.71, "grad_norm": 2.1603180169557, "learning_rate": 4.901640468227425e-06, "loss": 0.159, "step": 68900 }, { "epoch": 2.71, "grad_norm": 1.9479327767344141, "learning_rate": 4.901598662207358e-06, "loss": 0.1679, "step": 68925 }, { "epoch": 2.71, "grad_norm": 1.3827271970959603, "learning_rate": 4.901556856187291e-06, "loss": 0.1696, "step": 68950 }, { "epoch": 2.71, "grad_norm": 1.806564358309027, "learning_rate": 4.901515050167224e-06, "loss": 0.1659, "step": 68975 }, { "epoch": 2.71, "grad_norm": 2.0332614028768843, "learning_rate": 4.901473244147157e-06, "loss": 0.174, "step": 69000 }, { "epoch": 2.72, "grad_norm": 2.079579653117786, "learning_rate": 4.901431438127091e-06, "loss": 0.1801, "step": 69025 }, { "epoch": 2.72, "grad_norm": 2.6221209766597116, "learning_rate": 4.901389632107023e-06, "loss": 0.1836, "step": 69050 }, { "epoch": 2.72, "grad_norm": 2.0314819729265583, "learning_rate": 4.901347826086957e-06, "loss": 0.1723, "step": 69075 }, { "epoch": 2.72, "grad_norm": 2.561945317804322, "learning_rate": 4.9013060200668896e-06, "loss": 0.1716, "step": 69100 }, { "epoch": 2.72, "grad_norm": 2.22404829656543, "learning_rate": 4.901264214046823e-06, "loss": 0.1616, "step": 69125 }, { "epoch": 2.72, "grad_norm": 3.0273108921916183, "learning_rate": 4.901222408026756e-06, "loss": 0.1553, "step": 69150 }, { "epoch": 2.72, "grad_norm": 1.9271862459705924, "learning_rate": 4.901180602006689e-06, "loss": 0.1535, "step": 69175 }, { "epoch": 2.72, "grad_norm": 1.8153291263804212, "learning_rate": 4.901138795986622e-06, "loss": 0.167, "step": 69200 }, { "epoch": 2.72, "grad_norm": 3.3250437575275225, "learning_rate": 4.901096989966556e-06, "loss": 0.1723, "step": 69225 }, { "epoch": 2.72, "grad_norm": 2.767320385273223, "learning_rate": 4.901055183946488e-06, "loss": 0.1731, "step": 69250 }, { "epoch": 2.73, "grad_norm": 2.0717377921678923, "learning_rate": 4.901013377926422e-06, "loss": 0.1831, "step": 69275 }, { "epoch": 2.73, "grad_norm": 2.044160851544343, "learning_rate": 4.900971571906355e-06, "loss": 0.1707, "step": 69300 }, { "epoch": 2.73, "grad_norm": 2.3873404711946207, "learning_rate": 4.900929765886288e-06, "loss": 0.1549, "step": 69325 }, { "epoch": 2.73, "grad_norm": 2.2695647905827445, "learning_rate": 4.900887959866222e-06, "loss": 0.1818, "step": 69350 }, { "epoch": 2.73, "grad_norm": 1.4378326071784568, "learning_rate": 4.900846153846154e-06, "loss": 0.1617, "step": 69375 }, { "epoch": 2.73, "grad_norm": 1.9957570406745557, "learning_rate": 4.900804347826088e-06, "loss": 0.1649, "step": 69400 }, { "epoch": 2.73, "grad_norm": 2.2610619693808034, "learning_rate": 4.9007625418060206e-06, "loss": 0.1614, "step": 69425 }, { "epoch": 2.73, "grad_norm": 1.9526855098138778, "learning_rate": 4.900720735785954e-06, "loss": 0.1709, "step": 69450 }, { "epoch": 2.73, "grad_norm": 1.8988605064657225, "learning_rate": 4.900678929765887e-06, "loss": 0.1671, "step": 69475 }, { "epoch": 2.73, "grad_norm": 2.2746147588855834, "learning_rate": 4.90063712374582e-06, "loss": 0.1577, "step": 69500 }, { "epoch": 2.74, "grad_norm": 2.3770617254178164, "learning_rate": 4.900595317725753e-06, "loss": 0.1719, "step": 69525 }, { "epoch": 2.74, "grad_norm": 1.6617922742946107, "learning_rate": 4.900553511705687e-06, "loss": 0.1661, "step": 69550 }, { "epoch": 2.74, "grad_norm": 1.783213648386689, "learning_rate": 4.900511705685619e-06, "loss": 0.1591, "step": 69575 }, { "epoch": 2.74, "grad_norm": 2.981878987936314, "learning_rate": 4.900469899665552e-06, "loss": 0.1658, "step": 69600 }, { "epoch": 2.74, "grad_norm": 1.6476488160209035, "learning_rate": 4.900428093645485e-06, "loss": 0.1536, "step": 69625 }, { "epoch": 2.74, "grad_norm": 2.325940941846499, "learning_rate": 4.900386287625418e-06, "loss": 0.169, "step": 69650 }, { "epoch": 2.74, "grad_norm": 2.4505268528811524, "learning_rate": 4.900344481605351e-06, "loss": 0.1702, "step": 69675 }, { "epoch": 2.74, "grad_norm": 1.9311266368341664, "learning_rate": 4.9003026755852845e-06, "loss": 0.1548, "step": 69700 }, { "epoch": 2.74, "grad_norm": 1.612976220389442, "learning_rate": 4.900260869565218e-06, "loss": 0.1612, "step": 69725 }, { "epoch": 2.74, "grad_norm": 2.8961876952169523, "learning_rate": 4.900219063545151e-06, "loss": 0.1839, "step": 69750 }, { "epoch": 2.74, "grad_norm": 1.7490468429363928, "learning_rate": 4.900177257525084e-06, "loss": 0.1601, "step": 69775 }, { "epoch": 2.75, "grad_norm": 2.701145780751548, "learning_rate": 4.900135451505017e-06, "loss": 0.1826, "step": 69800 }, { "epoch": 2.75, "grad_norm": 1.8448000328926708, "learning_rate": 4.9000936454849505e-06, "loss": 0.1698, "step": 69825 }, { "epoch": 2.75, "grad_norm": 1.6674747031267765, "learning_rate": 4.900051839464883e-06, "loss": 0.1673, "step": 69850 }, { "epoch": 2.75, "grad_norm": 2.9567258027871106, "learning_rate": 4.900010033444817e-06, "loss": 0.1816, "step": 69875 }, { "epoch": 2.75, "grad_norm": 0.9510130741773436, "learning_rate": 4.8999682274247495e-06, "loss": 0.1765, "step": 69900 }, { "epoch": 2.75, "grad_norm": 2.3497872557880353, "learning_rate": 4.899926421404683e-06, "loss": 0.1721, "step": 69925 }, { "epoch": 2.75, "grad_norm": 2.5324795074581137, "learning_rate": 4.899884615384616e-06, "loss": 0.1654, "step": 69950 }, { "epoch": 2.75, "grad_norm": 1.497124922083692, "learning_rate": 4.899842809364549e-06, "loss": 0.1709, "step": 69975 }, { "epoch": 2.75, "grad_norm": 2.0770713159583654, "learning_rate": 4.899801003344482e-06, "loss": 0.1692, "step": 70000 }, { "epoch": 2.75, "eval_loss": 0.60400390625, "eval_runtime": 11576.636, "eval_samples_per_second": 0.818, "eval_steps_per_second": 0.051, "eval_wer": 0.11463410710551293, "step": 70000 }, { "epoch": 2.75, "grad_norm": 3.404522336853661, "learning_rate": 4.8997591973244155e-06, "loss": 0.1641, "step": 70025 }, { "epoch": 2.76, "grad_norm": 2.604856475398737, "learning_rate": 4.899717391304348e-06, "loss": 0.163, "step": 70050 }, { "epoch": 2.76, "grad_norm": 1.9673677744338196, "learning_rate": 4.899675585284282e-06, "loss": 0.1639, "step": 70075 }, { "epoch": 2.76, "grad_norm": 2.522347353607375, "learning_rate": 4.8996337792642144e-06, "loss": 0.1478, "step": 70100 }, { "epoch": 2.76, "grad_norm": 2.0517890231441367, "learning_rate": 4.899591973244148e-06, "loss": 0.1658, "step": 70125 }, { "epoch": 2.76, "grad_norm": 2.0923956047064167, "learning_rate": 4.899550167224081e-06, "loss": 0.1676, "step": 70150 }, { "epoch": 2.76, "grad_norm": 2.301297191893959, "learning_rate": 4.899508361204014e-06, "loss": 0.1651, "step": 70175 }, { "epoch": 2.76, "grad_norm": 2.583427012599905, "learning_rate": 4.899468227424749e-06, "loss": 0.1932, "step": 70200 }, { "epoch": 2.76, "grad_norm": 2.2125202227042093, "learning_rate": 4.899426421404683e-06, "loss": 0.1605, "step": 70225 }, { "epoch": 2.76, "grad_norm": 2.443241511177795, "learning_rate": 4.8993846153846155e-06, "loss": 0.1649, "step": 70250 }, { "epoch": 2.76, "grad_norm": 2.2941758774610634, "learning_rate": 4.899342809364549e-06, "loss": 0.1624, "step": 70275 }, { "epoch": 2.77, "grad_norm": 2.041645811171096, "learning_rate": 4.899301003344482e-06, "loss": 0.1718, "step": 70300 }, { "epoch": 2.77, "grad_norm": 3.4730930100201176, "learning_rate": 4.899259197324415e-06, "loss": 0.175, "step": 70325 }, { "epoch": 2.77, "grad_norm": 2.557520966431368, "learning_rate": 4.899217391304348e-06, "loss": 0.177, "step": 70350 }, { "epoch": 2.77, "grad_norm": 2.0185704278932226, "learning_rate": 4.8991755852842815e-06, "loss": 0.1474, "step": 70375 }, { "epoch": 2.77, "grad_norm": 3.1796598269669145, "learning_rate": 4.899133779264215e-06, "loss": 0.1533, "step": 70400 }, { "epoch": 2.77, "grad_norm": 2.000036451479218, "learning_rate": 4.899091973244148e-06, "loss": 0.1616, "step": 70425 }, { "epoch": 2.77, "grad_norm": 2.416211515477164, "learning_rate": 4.899050167224081e-06, "loss": 0.1909, "step": 70450 }, { "epoch": 2.77, "grad_norm": 2.3646053101731757, "learning_rate": 4.899008361204014e-06, "loss": 0.1586, "step": 70475 }, { "epoch": 2.77, "grad_norm": 2.2500982911348384, "learning_rate": 4.8989665551839475e-06, "loss": 0.169, "step": 70500 }, { "epoch": 2.77, "grad_norm": 1.9615764608426884, "learning_rate": 4.898924749163879e-06, "loss": 0.1608, "step": 70525 }, { "epoch": 2.78, "grad_norm": 2.359861228050328, "learning_rate": 4.898882943143813e-06, "loss": 0.1743, "step": 70550 }, { "epoch": 2.78, "grad_norm": 1.986395507242446, "learning_rate": 4.898841137123746e-06, "loss": 0.1612, "step": 70575 }, { "epoch": 2.78, "grad_norm": 2.650973993750791, "learning_rate": 4.898799331103679e-06, "loss": 0.1907, "step": 70600 }, { "epoch": 2.78, "grad_norm": 1.5054656396604489, "learning_rate": 4.898757525083612e-06, "loss": 0.1614, "step": 70625 }, { "epoch": 2.78, "grad_norm": 2.099185235357897, "learning_rate": 4.898715719063545e-06, "loss": 0.1599, "step": 70650 }, { "epoch": 2.78, "grad_norm": 1.752903980750435, "learning_rate": 4.898673913043478e-06, "loss": 0.1727, "step": 70675 }, { "epoch": 2.78, "grad_norm": 2.215584387164041, "learning_rate": 4.898632107023412e-06, "loss": 0.1555, "step": 70700 }, { "epoch": 2.78, "grad_norm": 2.7082133233014423, "learning_rate": 4.898590301003344e-06, "loss": 0.1586, "step": 70725 }, { "epoch": 2.78, "grad_norm": 1.8680573169122017, "learning_rate": 4.898548494983278e-06, "loss": 0.1922, "step": 70750 }, { "epoch": 2.78, "grad_norm": 1.5818802598899575, "learning_rate": 4.898506688963211e-06, "loss": 0.1632, "step": 70775 }, { "epoch": 2.79, "grad_norm": 2.6198897893488717, "learning_rate": 4.898464882943144e-06, "loss": 0.1765, "step": 70800 }, { "epoch": 2.79, "grad_norm": 2.112835040259786, "learning_rate": 4.898423076923078e-06, "loss": 0.174, "step": 70825 }, { "epoch": 2.79, "grad_norm": 2.5854543693695047, "learning_rate": 4.89838127090301e-06, "loss": 0.1454, "step": 70850 }, { "epoch": 2.79, "grad_norm": 1.339170672832636, "learning_rate": 4.898339464882944e-06, "loss": 0.1358, "step": 70875 }, { "epoch": 2.79, "grad_norm": 1.6926476355982136, "learning_rate": 4.898297658862877e-06, "loss": 0.1496, "step": 70900 }, { "epoch": 2.79, "grad_norm": 1.5075863109608603, "learning_rate": 4.89825585284281e-06, "loss": 0.1552, "step": 70925 }, { "epoch": 2.79, "grad_norm": 2.2298242138770483, "learning_rate": 4.898214046822743e-06, "loss": 0.1658, "step": 70950 }, { "epoch": 2.79, "grad_norm": 2.56403234832974, "learning_rate": 4.8981722408026764e-06, "loss": 0.1713, "step": 70975 }, { "epoch": 2.79, "grad_norm": 2.266105914417862, "learning_rate": 4.898130434782609e-06, "loss": 0.1721, "step": 71000 }, { "epoch": 2.79, "grad_norm": 2.625059941253109, "learning_rate": 4.898088628762543e-06, "loss": 0.1688, "step": 71025 }, { "epoch": 2.8, "grad_norm": 2.192109072550119, "learning_rate": 4.898046822742475e-06, "loss": 0.1651, "step": 71050 }, { "epoch": 2.8, "grad_norm": 4.035841787276603, "learning_rate": 4.898005016722409e-06, "loss": 0.164, "step": 71075 }, { "epoch": 2.8, "grad_norm": 3.4732146936336017, "learning_rate": 4.897963210702342e-06, "loss": 0.1783, "step": 71100 }, { "epoch": 2.8, "grad_norm": 1.9311210420583675, "learning_rate": 4.897921404682275e-06, "loss": 0.1717, "step": 71125 }, { "epoch": 2.8, "grad_norm": 1.8756949688926736, "learning_rate": 4.897879598662208e-06, "loss": 0.1659, "step": 71150 }, { "epoch": 2.8, "grad_norm": 2.422990879289581, "learning_rate": 4.897837792642141e-06, "loss": 0.1725, "step": 71175 }, { "epoch": 2.8, "grad_norm": 2.8900976364859154, "learning_rate": 4.897797658862876e-06, "loss": 0.1763, "step": 71200 }, { "epoch": 2.8, "grad_norm": 2.381504042282298, "learning_rate": 4.89775585284281e-06, "loss": 0.1592, "step": 71225 }, { "epoch": 2.8, "grad_norm": 1.8401869313416392, "learning_rate": 4.897714046822743e-06, "loss": 0.1719, "step": 71250 }, { "epoch": 2.8, "grad_norm": 1.2582691293928905, "learning_rate": 4.897672240802676e-06, "loss": 0.1661, "step": 71275 }, { "epoch": 2.8, "grad_norm": 1.3065157469687312, "learning_rate": 4.897630434782609e-06, "loss": 0.1574, "step": 71300 }, { "epoch": 2.81, "grad_norm": 2.8882287169254925, "learning_rate": 4.8975886287625424e-06, "loss": 0.1781, "step": 71325 }, { "epoch": 2.81, "grad_norm": 1.8066680445158254, "learning_rate": 4.897546822742475e-06, "loss": 0.1514, "step": 71350 }, { "epoch": 2.81, "grad_norm": 2.1135129603595155, "learning_rate": 4.897505016722409e-06, "loss": 0.168, "step": 71375 }, { "epoch": 2.81, "grad_norm": 2.4731969275899046, "learning_rate": 4.897463210702341e-06, "loss": 0.1679, "step": 71400 }, { "epoch": 2.81, "grad_norm": 2.1670796062973263, "learning_rate": 4.897421404682275e-06, "loss": 0.1721, "step": 71425 }, { "epoch": 2.81, "grad_norm": 2.1632396701972914, "learning_rate": 4.897379598662208e-06, "loss": 0.1728, "step": 71450 }, { "epoch": 2.81, "grad_norm": 1.8934608853543096, "learning_rate": 4.89733779264214e-06, "loss": 0.1585, "step": 71475 }, { "epoch": 2.81, "grad_norm": 1.8398068522285902, "learning_rate": 4.897295986622074e-06, "loss": 0.1697, "step": 71500 }, { "epoch": 2.81, "grad_norm": 1.8325654805684302, "learning_rate": 4.8972541806020066e-06, "loss": 0.1704, "step": 71525 }, { "epoch": 2.81, "grad_norm": 2.3199751539353652, "learning_rate": 4.89721237458194e-06, "loss": 0.161, "step": 71550 }, { "epoch": 2.82, "grad_norm": 1.147554620731119, "learning_rate": 4.897170568561873e-06, "loss": 0.1531, "step": 71575 }, { "epoch": 2.82, "grad_norm": 2.0353602477046118, "learning_rate": 4.897128762541806e-06, "loss": 0.163, "step": 71600 }, { "epoch": 2.82, "grad_norm": 2.371029050435062, "learning_rate": 4.897086956521739e-06, "loss": 0.1813, "step": 71625 }, { "epoch": 2.82, "grad_norm": 2.1762205039573836, "learning_rate": 4.897045150501673e-06, "loss": 0.1736, "step": 71650 }, { "epoch": 2.82, "grad_norm": 1.7956537476456738, "learning_rate": 4.897003344481605e-06, "loss": 0.1716, "step": 71675 }, { "epoch": 2.82, "grad_norm": 2.2524746958016393, "learning_rate": 4.896961538461539e-06, "loss": 0.1801, "step": 71700 }, { "epoch": 2.82, "grad_norm": 2.136139836516011, "learning_rate": 4.8969197324414715e-06, "loss": 0.1635, "step": 71725 }, { "epoch": 2.82, "grad_norm": 2.307095824383635, "learning_rate": 4.896877926421405e-06, "loss": 0.1487, "step": 71750 }, { "epoch": 2.82, "grad_norm": 1.9994033511834624, "learning_rate": 4.896836120401338e-06, "loss": 0.1698, "step": 71775 }, { "epoch": 2.82, "grad_norm": 2.0458057349311662, "learning_rate": 4.896794314381271e-06, "loss": 0.1664, "step": 71800 }, { "epoch": 2.83, "grad_norm": 2.7828406872393536, "learning_rate": 4.896752508361204e-06, "loss": 0.1835, "step": 71825 }, { "epoch": 2.83, "grad_norm": 2.3490321431100463, "learning_rate": 4.8967107023411376e-06, "loss": 0.1772, "step": 71850 }, { "epoch": 2.83, "grad_norm": 2.1239436779644456, "learning_rate": 4.89666889632107e-06, "loss": 0.1698, "step": 71875 }, { "epoch": 2.83, "grad_norm": 2.55078053521957, "learning_rate": 4.896627090301004e-06, "loss": 0.1865, "step": 71900 }, { "epoch": 2.83, "grad_norm": 2.0722007158937537, "learning_rate": 4.896585284280937e-06, "loss": 0.1534, "step": 71925 }, { "epoch": 2.83, "grad_norm": 3.2040867285924763, "learning_rate": 4.89654347826087e-06, "loss": 0.1615, "step": 71950 }, { "epoch": 2.83, "grad_norm": 2.2719001769767058, "learning_rate": 4.896501672240804e-06, "loss": 0.16, "step": 71975 }, { "epoch": 2.83, "grad_norm": 2.003118329948211, "learning_rate": 4.896459866220736e-06, "loss": 0.157, "step": 72000 }, { "epoch": 2.83, "grad_norm": 2.933295145712109, "learning_rate": 4.89641806020067e-06, "loss": 0.1549, "step": 72025 }, { "epoch": 2.83, "grad_norm": 1.89512122355957, "learning_rate": 4.8963762541806025e-06, "loss": 0.174, "step": 72050 }, { "epoch": 2.84, "grad_norm": 1.9653910668438528, "learning_rate": 4.896334448160536e-06, "loss": 0.1537, "step": 72075 }, { "epoch": 2.84, "grad_norm": 1.8822786208686735, "learning_rate": 4.896292642140469e-06, "loss": 0.1483, "step": 72100 }, { "epoch": 2.84, "grad_norm": 1.6974680148122512, "learning_rate": 4.896250836120402e-06, "loss": 0.1842, "step": 72125 }, { "epoch": 2.84, "grad_norm": 2.1232240441546324, "learning_rate": 4.896209030100335e-06, "loss": 0.1708, "step": 72150 }, { "epoch": 2.84, "grad_norm": 2.5147761693595014, "learning_rate": 4.896167224080269e-06, "loss": 0.1734, "step": 72175 }, { "epoch": 2.84, "grad_norm": 1.5814979109687117, "learning_rate": 4.8961270903010036e-06, "loss": 0.1753, "step": 72200 }, { "epoch": 2.84, "grad_norm": 1.9271487770120417, "learning_rate": 4.896085284280937e-06, "loss": 0.16, "step": 72225 }, { "epoch": 2.84, "grad_norm": 1.6807905811978427, "learning_rate": 4.89604347826087e-06, "loss": 0.1638, "step": 72250 }, { "epoch": 2.84, "grad_norm": 3.0101811846023243, "learning_rate": 4.896001672240803e-06, "loss": 0.1482, "step": 72275 }, { "epoch": 2.84, "grad_norm": 2.1706275732886335, "learning_rate": 4.895959866220736e-06, "loss": 0.1694, "step": 72300 }, { "epoch": 2.85, "grad_norm": 1.8417541316276584, "learning_rate": 4.89591806020067e-06, "loss": 0.1714, "step": 72325 }, { "epoch": 2.85, "grad_norm": 1.6630468895130261, "learning_rate": 4.895876254180602e-06, "loss": 0.1551, "step": 72350 }, { "epoch": 2.85, "grad_norm": 3.309966479941043, "learning_rate": 4.895834448160536e-06, "loss": 0.1719, "step": 72375 }, { "epoch": 2.85, "grad_norm": 1.7480764555332018, "learning_rate": 4.8957926421404685e-06, "loss": 0.1615, "step": 72400 }, { "epoch": 2.85, "grad_norm": 1.8651253589940062, "learning_rate": 4.895750836120401e-06, "loss": 0.1673, "step": 72425 }, { "epoch": 2.85, "grad_norm": 1.367768522432886, "learning_rate": 4.895709030100335e-06, "loss": 0.1614, "step": 72450 }, { "epoch": 2.85, "grad_norm": 2.47582620859887, "learning_rate": 4.8956672240802675e-06, "loss": 0.1607, "step": 72475 }, { "epoch": 2.85, "grad_norm": 3.023070237184101, "learning_rate": 4.895625418060201e-06, "loss": 0.1719, "step": 72500 }, { "epoch": 2.85, "grad_norm": 2.177541987436765, "learning_rate": 4.895583612040134e-06, "loss": 0.1612, "step": 72525 }, { "epoch": 2.85, "grad_norm": 2.257043109303166, "learning_rate": 4.895541806020067e-06, "loss": 0.1707, "step": 72550 }, { "epoch": 2.86, "grad_norm": 2.0137969172613395, "learning_rate": 4.8955e-06, "loss": 0.1643, "step": 72575 }, { "epoch": 2.86, "grad_norm": 1.9763477092821808, "learning_rate": 4.8954581939799335e-06, "loss": 0.1709, "step": 72600 }, { "epoch": 2.86, "grad_norm": 1.6234222767388702, "learning_rate": 4.895416387959866e-06, "loss": 0.1556, "step": 72625 }, { "epoch": 2.86, "grad_norm": 1.8976361029757107, "learning_rate": 4.8953745819398e-06, "loss": 0.1585, "step": 72650 }, { "epoch": 2.86, "grad_norm": 1.739742768865686, "learning_rate": 4.8953327759197325e-06, "loss": 0.1721, "step": 72675 }, { "epoch": 2.86, "grad_norm": 2.1472365854087303, "learning_rate": 4.895290969899666e-06, "loss": 0.1681, "step": 72700 }, { "epoch": 2.86, "grad_norm": 2.4942342308769527, "learning_rate": 4.895249163879599e-06, "loss": 0.1653, "step": 72725 }, { "epoch": 2.86, "grad_norm": 2.213462965668188, "learning_rate": 4.895207357859532e-06, "loss": 0.1533, "step": 72750 }, { "epoch": 2.86, "grad_norm": 1.757886571286951, "learning_rate": 4.895165551839465e-06, "loss": 0.1628, "step": 72775 }, { "epoch": 2.86, "grad_norm": 3.161382454759714, "learning_rate": 4.8951237458193985e-06, "loss": 0.1746, "step": 72800 }, { "epoch": 2.86, "grad_norm": 2.898760234748562, "learning_rate": 4.895081939799331e-06, "loss": 0.1644, "step": 72825 }, { "epoch": 2.87, "grad_norm": 1.9181331685697647, "learning_rate": 4.895040133779265e-06, "loss": 0.1744, "step": 72850 }, { "epoch": 2.87, "grad_norm": 1.2622034792223826, "learning_rate": 4.8949983277591974e-06, "loss": 0.1627, "step": 72875 }, { "epoch": 2.87, "grad_norm": 2.0311044399909335, "learning_rate": 4.894956521739131e-06, "loss": 0.1644, "step": 72900 }, { "epoch": 2.87, "grad_norm": 1.7264522011793653, "learning_rate": 4.894914715719064e-06, "loss": 0.162, "step": 72925 }, { "epoch": 2.87, "grad_norm": 2.5284533556414153, "learning_rate": 4.894872909698997e-06, "loss": 0.1708, "step": 72950 }, { "epoch": 2.87, "grad_norm": 2.3584564833935526, "learning_rate": 4.89483110367893e-06, "loss": 0.1553, "step": 72975 }, { "epoch": 2.87, "grad_norm": 2.6811106192462244, "learning_rate": 4.8947892976588635e-06, "loss": 0.1768, "step": 73000 }, { "epoch": 2.87, "grad_norm": 2.880359819740993, "learning_rate": 4.894747491638796e-06, "loss": 0.1638, "step": 73025 }, { "epoch": 2.87, "grad_norm": 1.8098189143936174, "learning_rate": 4.89470568561873e-06, "loss": 0.1694, "step": 73050 }, { "epoch": 2.87, "grad_norm": 2.1960220587817347, "learning_rate": 4.894663879598663e-06, "loss": 0.1529, "step": 73075 }, { "epoch": 2.88, "grad_norm": 1.879513441542289, "learning_rate": 4.894622073578596e-06, "loss": 0.1602, "step": 73100 }, { "epoch": 2.88, "grad_norm": 2.354208570461897, "learning_rate": 4.894580267558529e-06, "loss": 0.154, "step": 73125 }, { "epoch": 2.88, "grad_norm": 1.2171654991734855, "learning_rate": 4.894538461538461e-06, "loss": 0.1699, "step": 73150 }, { "epoch": 2.88, "grad_norm": 2.2940717027810593, "learning_rate": 4.894496655518395e-06, "loss": 0.1675, "step": 73175 }, { "epoch": 2.88, "grad_norm": 1.7813725219024068, "learning_rate": 4.894456521739131e-06, "loss": 0.1726, "step": 73200 }, { "epoch": 2.88, "grad_norm": 1.5471775285839873, "learning_rate": 4.894414715719064e-06, "loss": 0.1506, "step": 73225 }, { "epoch": 2.88, "grad_norm": 2.3179111030180164, "learning_rate": 4.894372909698997e-06, "loss": 0.1549, "step": 73250 }, { "epoch": 2.88, "grad_norm": 2.381683834465129, "learning_rate": 4.8943311036789305e-06, "loss": 0.1578, "step": 73275 }, { "epoch": 2.88, "grad_norm": 2.8932283117028095, "learning_rate": 4.894289297658863e-06, "loss": 0.1596, "step": 73300 }, { "epoch": 2.88, "grad_norm": 3.0652628362276526, "learning_rate": 4.894247491638797e-06, "loss": 0.1757, "step": 73325 }, { "epoch": 2.89, "grad_norm": 2.2515578022167846, "learning_rate": 4.8942056856187295e-06, "loss": 0.1531, "step": 73350 }, { "epoch": 2.89, "grad_norm": 1.9302780505682737, "learning_rate": 4.894163879598662e-06, "loss": 0.1751, "step": 73375 }, { "epoch": 2.89, "grad_norm": 2.3296661369560576, "learning_rate": 4.894122073578596e-06, "loss": 0.1579, "step": 73400 }, { "epoch": 2.89, "grad_norm": 2.317270323434963, "learning_rate": 4.894080267558528e-06, "loss": 0.1521, "step": 73425 }, { "epoch": 2.89, "grad_norm": 1.8791542432395285, "learning_rate": 4.894038461538462e-06, "loss": 0.1666, "step": 73450 }, { "epoch": 2.89, "grad_norm": 3.1141465597924354, "learning_rate": 4.893996655518395e-06, "loss": 0.1779, "step": 73475 }, { "epoch": 2.89, "grad_norm": 1.83550212334721, "learning_rate": 4.893954849498328e-06, "loss": 0.1573, "step": 73500 }, { "epoch": 2.89, "grad_norm": 1.869911326816695, "learning_rate": 4.893913043478261e-06, "loss": 0.1505, "step": 73525 }, { "epoch": 2.89, "grad_norm": 1.875677676542583, "learning_rate": 4.8938712374581944e-06, "loss": 0.1749, "step": 73550 }, { "epoch": 2.89, "grad_norm": 2.877401745073335, "learning_rate": 4.893829431438127e-06, "loss": 0.1756, "step": 73575 }, { "epoch": 2.9, "grad_norm": 2.8537228949707183, "learning_rate": 4.893787625418061e-06, "loss": 0.1553, "step": 73600 }, { "epoch": 2.9, "grad_norm": 3.132534812602467, "learning_rate": 4.893745819397993e-06, "loss": 0.1593, "step": 73625 }, { "epoch": 2.9, "grad_norm": 1.5367775675098951, "learning_rate": 4.893704013377927e-06, "loss": 0.1658, "step": 73650 }, { "epoch": 2.9, "grad_norm": 2.51031029076394, "learning_rate": 4.89366220735786e-06, "loss": 0.1599, "step": 73675 }, { "epoch": 2.9, "grad_norm": 1.9305209052598873, "learning_rate": 4.893620401337793e-06, "loss": 0.1642, "step": 73700 }, { "epoch": 2.9, "grad_norm": 1.8180335977528552, "learning_rate": 4.893578595317726e-06, "loss": 0.1697, "step": 73725 }, { "epoch": 2.9, "grad_norm": 1.7564659182371012, "learning_rate": 4.893536789297659e-06, "loss": 0.1408, "step": 73750 }, { "epoch": 2.9, "grad_norm": 2.707073171305364, "learning_rate": 4.893494983277592e-06, "loss": 0.1761, "step": 73775 }, { "epoch": 2.9, "grad_norm": 2.0197959277765496, "learning_rate": 4.893453177257526e-06, "loss": 0.1544, "step": 73800 }, { "epoch": 2.9, "grad_norm": 1.592459155442505, "learning_rate": 4.893411371237458e-06, "loss": 0.169, "step": 73825 }, { "epoch": 2.91, "grad_norm": 2.1128575826279286, "learning_rate": 4.893369565217392e-06, "loss": 0.1821, "step": 73850 }, { "epoch": 2.91, "grad_norm": 2.330641493994605, "learning_rate": 4.893327759197325e-06, "loss": 0.1599, "step": 73875 }, { "epoch": 2.91, "grad_norm": 2.289273202147065, "learning_rate": 4.893285953177258e-06, "loss": 0.1665, "step": 73900 }, { "epoch": 2.91, "grad_norm": 2.3967166690376724, "learning_rate": 4.893244147157191e-06, "loss": 0.1537, "step": 73925 }, { "epoch": 2.91, "grad_norm": 1.7436469370309389, "learning_rate": 4.893202341137124e-06, "loss": 0.1681, "step": 73950 }, { "epoch": 2.91, "grad_norm": 2.593160901601273, "learning_rate": 4.893160535117057e-06, "loss": 0.1549, "step": 73975 }, { "epoch": 2.91, "grad_norm": 1.7292694124000343, "learning_rate": 4.893118729096991e-06, "loss": 0.1704, "step": 74000 }, { "epoch": 2.91, "grad_norm": 2.7513280274403336, "learning_rate": 4.893076923076923e-06, "loss": 0.1762, "step": 74025 }, { "epoch": 2.91, "grad_norm": 2.245015646917485, "learning_rate": 4.893035117056857e-06, "loss": 0.1545, "step": 74050 }, { "epoch": 2.91, "grad_norm": 2.0277778975395058, "learning_rate": 4.89299331103679e-06, "loss": 0.1585, "step": 74075 }, { "epoch": 2.92, "grad_norm": 2.6304291462822813, "learning_rate": 4.892951505016722e-06, "loss": 0.1635, "step": 74100 }, { "epoch": 2.92, "grad_norm": 2.1099036734597085, "learning_rate": 4.892909698996656e-06, "loss": 0.165, "step": 74125 }, { "epoch": 2.92, "grad_norm": 2.3841230259715886, "learning_rate": 4.8928678929765885e-06, "loss": 0.1541, "step": 74150 }, { "epoch": 2.92, "grad_norm": 1.4945382544604393, "learning_rate": 4.892826086956522e-06, "loss": 0.159, "step": 74175 }, { "epoch": 2.92, "grad_norm": 3.160768317692789, "learning_rate": 4.892785953177258e-06, "loss": 0.1554, "step": 74200 }, { "epoch": 2.92, "grad_norm": 2.307190734455112, "learning_rate": 4.8927441471571914e-06, "loss": 0.1572, "step": 74225 }, { "epoch": 2.92, "grad_norm": 2.5864605527863525, "learning_rate": 4.892702341137124e-06, "loss": 0.1478, "step": 74250 }, { "epoch": 2.92, "grad_norm": 2.3159481412550114, "learning_rate": 4.892660535117058e-06, "loss": 0.1712, "step": 74275 }, { "epoch": 2.92, "grad_norm": 1.7017971769327145, "learning_rate": 4.89261872909699e-06, "loss": 0.1576, "step": 74300 }, { "epoch": 2.92, "grad_norm": 2.916033378211683, "learning_rate": 4.892576923076923e-06, "loss": 0.1415, "step": 74325 }, { "epoch": 2.92, "grad_norm": 2.589428720427529, "learning_rate": 4.892535117056857e-06, "loss": 0.1514, "step": 74350 }, { "epoch": 2.93, "grad_norm": 2.4781537139443732, "learning_rate": 4.892493311036789e-06, "loss": 0.1753, "step": 74375 }, { "epoch": 2.93, "grad_norm": 2.087506703608687, "learning_rate": 4.892451505016723e-06, "loss": 0.1665, "step": 74400 }, { "epoch": 2.93, "grad_norm": 1.4517621145289306, "learning_rate": 4.8924096989966556e-06, "loss": 0.1673, "step": 74425 }, { "epoch": 2.93, "grad_norm": 1.549222778728874, "learning_rate": 4.892367892976589e-06, "loss": 0.1623, "step": 74450 }, { "epoch": 2.93, "grad_norm": 2.6905760289098284, "learning_rate": 4.892326086956522e-06, "loss": 0.1688, "step": 74475 }, { "epoch": 2.93, "grad_norm": 1.7294195336741673, "learning_rate": 4.892284280936455e-06, "loss": 0.1792, "step": 74500 }, { "epoch": 2.93, "grad_norm": 2.2205866806833905, "learning_rate": 4.892242474916388e-06, "loss": 0.1703, "step": 74525 }, { "epoch": 2.93, "grad_norm": 1.512048417276666, "learning_rate": 4.892200668896322e-06, "loss": 0.1727, "step": 74550 }, { "epoch": 2.93, "grad_norm": 1.6165734990093756, "learning_rate": 4.892158862876254e-06, "loss": 0.1689, "step": 74575 }, { "epoch": 2.93, "grad_norm": 1.7105503061939633, "learning_rate": 4.892117056856188e-06, "loss": 0.1645, "step": 74600 }, { "epoch": 2.94, "grad_norm": 2.241186103825974, "learning_rate": 4.8920752508361206e-06, "loss": 0.174, "step": 74625 }, { "epoch": 2.94, "grad_norm": 3.0478956139162046, "learning_rate": 4.892033444816054e-06, "loss": 0.1732, "step": 74650 }, { "epoch": 2.94, "grad_norm": 2.2023056589981613, "learning_rate": 4.891991638795987e-06, "loss": 0.1704, "step": 74675 }, { "epoch": 2.94, "grad_norm": 2.1750560553924156, "learning_rate": 4.89194983277592e-06, "loss": 0.1757, "step": 74700 }, { "epoch": 2.94, "grad_norm": 1.7338152881128504, "learning_rate": 4.891908026755853e-06, "loss": 0.1625, "step": 74725 }, { "epoch": 2.94, "grad_norm": 1.5947374991763381, "learning_rate": 4.891866220735787e-06, "loss": 0.1742, "step": 74750 }, { "epoch": 2.94, "grad_norm": 1.9887138661903652, "learning_rate": 4.891824414715719e-06, "loss": 0.1811, "step": 74775 }, { "epoch": 2.94, "grad_norm": 2.436010532096877, "learning_rate": 4.891782608695653e-06, "loss": 0.1537, "step": 74800 }, { "epoch": 2.94, "grad_norm": 2.926286576820829, "learning_rate": 4.8917408026755855e-06, "loss": 0.16, "step": 74825 }, { "epoch": 2.94, "grad_norm": 1.824564621763424, "learning_rate": 4.891698996655519e-06, "loss": 0.1629, "step": 74850 }, { "epoch": 2.95, "grad_norm": 2.683636286974673, "learning_rate": 4.891657190635452e-06, "loss": 0.1557, "step": 74875 }, { "epoch": 2.95, "grad_norm": 2.1385204177371224, "learning_rate": 4.891615384615385e-06, "loss": 0.1675, "step": 74900 }, { "epoch": 2.95, "grad_norm": 1.5571459930052227, "learning_rate": 4.891573578595318e-06, "loss": 0.1468, "step": 74925 }, { "epoch": 2.95, "grad_norm": 2.19393355253032, "learning_rate": 4.8915317725752516e-06, "loss": 0.1668, "step": 74950 }, { "epoch": 2.95, "grad_norm": 2.1955950411829646, "learning_rate": 4.891489966555184e-06, "loss": 0.1705, "step": 74975 }, { "epoch": 2.95, "grad_norm": 1.8842544208995302, "learning_rate": 4.891448160535118e-06, "loss": 0.1853, "step": 75000 }, { "epoch": 2.95, "grad_norm": 1.9456707154959463, "learning_rate": 4.8914063545150505e-06, "loss": 0.1676, "step": 75025 }, { "epoch": 2.95, "grad_norm": 1.720712384593594, "learning_rate": 4.891364548494983e-06, "loss": 0.1691, "step": 75050 }, { "epoch": 2.95, "grad_norm": 2.732269103665272, "learning_rate": 4.891322742474917e-06, "loss": 0.157, "step": 75075 }, { "epoch": 2.95, "grad_norm": 2.093993948698275, "learning_rate": 4.8912809364548494e-06, "loss": 0.1579, "step": 75100 }, { "epoch": 2.96, "grad_norm": 2.2596660812012637, "learning_rate": 4.891239130434783e-06, "loss": 0.1612, "step": 75125 }, { "epoch": 2.96, "grad_norm": 1.607362977009435, "learning_rate": 4.891197324414716e-06, "loss": 0.1539, "step": 75150 }, { "epoch": 2.96, "grad_norm": 1.750020200184701, "learning_rate": 4.891155518394649e-06, "loss": 0.1653, "step": 75175 }, { "epoch": 2.96, "grad_norm": 2.65398912558923, "learning_rate": 4.891115384615385e-06, "loss": 0.1536, "step": 75200 }, { "epoch": 2.96, "grad_norm": 2.8119697714667136, "learning_rate": 4.891073578595319e-06, "loss": 0.1533, "step": 75225 }, { "epoch": 2.96, "grad_norm": 2.249098596865318, "learning_rate": 4.891031772575251e-06, "loss": 0.1636, "step": 75250 }, { "epoch": 2.96, "grad_norm": 2.380834954836527, "learning_rate": 4.890989966555184e-06, "loss": 0.1592, "step": 75275 }, { "epoch": 2.96, "grad_norm": 2.074689075288214, "learning_rate": 4.8909481605351176e-06, "loss": 0.1662, "step": 75300 }, { "epoch": 2.96, "grad_norm": 2.2019387377694404, "learning_rate": 4.89090635451505e-06, "loss": 0.1697, "step": 75325 }, { "epoch": 2.96, "grad_norm": 2.4940970023751756, "learning_rate": 4.890864548494984e-06, "loss": 0.1524, "step": 75350 }, { "epoch": 2.97, "grad_norm": 3.0020918616337213, "learning_rate": 4.8908227424749165e-06, "loss": 0.167, "step": 75375 }, { "epoch": 2.97, "grad_norm": 2.04633813344427, "learning_rate": 4.89078093645485e-06, "loss": 0.1857, "step": 75400 }, { "epoch": 2.97, "grad_norm": 1.8578961402822785, "learning_rate": 4.890739130434783e-06, "loss": 0.1712, "step": 75425 }, { "epoch": 2.97, "grad_norm": 2.568127490750955, "learning_rate": 4.890697324414716e-06, "loss": 0.1824, "step": 75450 }, { "epoch": 2.97, "grad_norm": 1.7273995103916189, "learning_rate": 4.890655518394649e-06, "loss": 0.1633, "step": 75475 }, { "epoch": 2.97, "grad_norm": 1.8705828417286539, "learning_rate": 4.8906137123745825e-06, "loss": 0.1555, "step": 75500 }, { "epoch": 2.97, "grad_norm": 1.2009745533949896, "learning_rate": 4.890571906354515e-06, "loss": 0.1711, "step": 75525 }, { "epoch": 2.97, "grad_norm": 2.4521956369026072, "learning_rate": 4.890530100334449e-06, "loss": 0.1702, "step": 75550 }, { "epoch": 2.97, "grad_norm": 1.6330318191278905, "learning_rate": 4.8904882943143815e-06, "loss": 0.1622, "step": 75575 }, { "epoch": 2.97, "grad_norm": 1.8895480021883273, "learning_rate": 4.890446488294315e-06, "loss": 0.1484, "step": 75600 }, { "epoch": 2.98, "grad_norm": 2.990507862504963, "learning_rate": 4.890404682274248e-06, "loss": 0.1704, "step": 75625 }, { "epoch": 2.98, "grad_norm": 2.254372856378548, "learning_rate": 4.890362876254181e-06, "loss": 0.1781, "step": 75650 }, { "epoch": 2.98, "grad_norm": 2.038112544222414, "learning_rate": 4.890321070234114e-06, "loss": 0.1593, "step": 75675 }, { "epoch": 2.98, "grad_norm": 3.040909300567587, "learning_rate": 4.8902792642140475e-06, "loss": 0.1729, "step": 75700 }, { "epoch": 2.98, "grad_norm": 2.7786710261443406, "learning_rate": 4.89023745819398e-06, "loss": 0.1461, "step": 75725 }, { "epoch": 2.98, "grad_norm": 2.3393555386740212, "learning_rate": 4.890195652173914e-06, "loss": 0.1595, "step": 75750 }, { "epoch": 2.98, "grad_norm": 5.199257311504345, "learning_rate": 4.8901538461538465e-06, "loss": 0.1753, "step": 75775 }, { "epoch": 2.98, "grad_norm": 1.703194333068814, "learning_rate": 4.89011204013378e-06, "loss": 0.1589, "step": 75800 }, { "epoch": 2.98, "grad_norm": 2.780370147699592, "learning_rate": 4.890070234113713e-06, "loss": 0.1715, "step": 75825 }, { "epoch": 2.98, "grad_norm": 2.3580394840206327, "learning_rate": 4.890028428093646e-06, "loss": 0.1559, "step": 75850 }, { "epoch": 2.98, "grad_norm": 2.1728983607540133, "learning_rate": 4.889986622073579e-06, "loss": 0.1591, "step": 75875 }, { "epoch": 2.99, "grad_norm": 2.277580980241663, "learning_rate": 4.8899448160535125e-06, "loss": 0.1659, "step": 75900 }, { "epoch": 2.99, "grad_norm": 2.3986761264261127, "learning_rate": 4.889903010033445e-06, "loss": 0.1672, "step": 75925 }, { "epoch": 2.99, "grad_norm": 1.931152925984652, "learning_rate": 4.889861204013378e-06, "loss": 0.1657, "step": 75950 }, { "epoch": 2.99, "grad_norm": 1.68257502997896, "learning_rate": 4.8898193979933114e-06, "loss": 0.1614, "step": 75975 }, { "epoch": 2.99, "grad_norm": 1.470089170915271, "learning_rate": 4.889777591973244e-06, "loss": 0.1782, "step": 76000 }, { "epoch": 2.99, "grad_norm": 3.2551584773611544, "learning_rate": 4.889735785953178e-06, "loss": 0.1503, "step": 76025 }, { "epoch": 2.99, "grad_norm": 2.1093960304591755, "learning_rate": 4.88969397993311e-06, "loss": 0.1657, "step": 76050 }, { "epoch": 2.99, "grad_norm": 2.672877955706056, "learning_rate": 4.889652173913044e-06, "loss": 0.1649, "step": 76075 }, { "epoch": 2.99, "grad_norm": 3.1470669315639115, "learning_rate": 4.889610367892977e-06, "loss": 0.1746, "step": 76100 }, { "epoch": 2.99, "grad_norm": 3.0579894482513397, "learning_rate": 4.88956856187291e-06, "loss": 0.1657, "step": 76125 }, { "epoch": 3.0, "grad_norm": 2.947301172535142, "learning_rate": 4.889526755852843e-06, "loss": 0.1516, "step": 76150 }, { "epoch": 3.0, "grad_norm": 3.3863874587051725, "learning_rate": 4.889484949832776e-06, "loss": 0.1697, "step": 76175 }, { "epoch": 3.0, "grad_norm": 1.9291008913623715, "learning_rate": 4.889444816053512e-06, "loss": 0.1776, "step": 76200 }, { "epoch": 3.0, "grad_norm": 2.319491892854814, "learning_rate": 4.889403010033445e-06, "loss": 0.1781, "step": 76225 }, { "epoch": 3.0, "grad_norm": 3.488180039037966, "learning_rate": 4.889362876254181e-06, "loss": 0.1857, "step": 76250 }, { "epoch": 3.0, "grad_norm": 2.412768379326752, "learning_rate": 4.889321070234114e-06, "loss": 0.1438, "step": 76275 }, { "epoch": 3.0, "grad_norm": 3.8203948129859513, "learning_rate": 4.889279264214047e-06, "loss": 0.1328, "step": 76300 }, { "epoch": 3.0, "grad_norm": 2.8638845252884635, "learning_rate": 4.8892374581939806e-06, "loss": 0.1195, "step": 76325 }, { "epoch": 3.0, "grad_norm": 3.0186144490512676, "learning_rate": 4.889195652173913e-06, "loss": 0.1231, "step": 76350 }, { "epoch": 3.0, "grad_norm": 2.40564050431268, "learning_rate": 4.889153846153847e-06, "loss": 0.1288, "step": 76375 }, { "epoch": 3.01, "grad_norm": 3.223902399031433, "learning_rate": 4.88911204013378e-06, "loss": 0.1253, "step": 76400 }, { "epoch": 3.01, "grad_norm": 2.524120816578796, "learning_rate": 4.889070234113712e-06, "loss": 0.1186, "step": 76425 }, { "epoch": 3.01, "grad_norm": 4.33608755350878, "learning_rate": 4.889028428093646e-06, "loss": 0.1302, "step": 76450 }, { "epoch": 3.01, "grad_norm": 3.0812374326190763, "learning_rate": 4.8889866220735784e-06, "loss": 0.1154, "step": 76475 }, { "epoch": 3.01, "grad_norm": 3.8611643250629357, "learning_rate": 4.888944816053512e-06, "loss": 0.1182, "step": 76500 }, { "epoch": 3.01, "grad_norm": 4.562201565976749, "learning_rate": 4.888903010033445e-06, "loss": 0.1163, "step": 76525 }, { "epoch": 3.01, "grad_norm": 5.0482039643712335, "learning_rate": 4.888861204013378e-06, "loss": 0.1294, "step": 76550 }, { "epoch": 3.01, "grad_norm": 2.7435608656097723, "learning_rate": 4.888819397993311e-06, "loss": 0.1384, "step": 76575 }, { "epoch": 3.01, "grad_norm": 2.2602926708641857, "learning_rate": 4.8887775919732445e-06, "loss": 0.1116, "step": 76600 }, { "epoch": 3.01, "grad_norm": 5.6874792345843135, "learning_rate": 4.888735785953177e-06, "loss": 0.1224, "step": 76625 }, { "epoch": 3.02, "grad_norm": 2.720275109336373, "learning_rate": 4.888693979933111e-06, "loss": 0.123, "step": 76650 }, { "epoch": 3.02, "grad_norm": 4.395574166086428, "learning_rate": 4.888652173913043e-06, "loss": 0.1168, "step": 76675 }, { "epoch": 3.02, "grad_norm": 2.703916615126388, "learning_rate": 4.888610367892977e-06, "loss": 0.1214, "step": 76700 }, { "epoch": 3.02, "grad_norm": 3.181411929507769, "learning_rate": 4.88856856187291e-06, "loss": 0.1138, "step": 76725 }, { "epoch": 3.02, "grad_norm": 3.509556295339099, "learning_rate": 4.888526755852843e-06, "loss": 0.1232, "step": 76750 }, { "epoch": 3.02, "grad_norm": 3.8902899973523626, "learning_rate": 4.888484949832776e-06, "loss": 0.1212, "step": 76775 }, { "epoch": 3.02, "grad_norm": 2.0686794778491895, "learning_rate": 4.8884431438127095e-06, "loss": 0.123, "step": 76800 }, { "epoch": 3.02, "grad_norm": 3.16510341619209, "learning_rate": 4.888401337792643e-06, "loss": 0.1183, "step": 76825 }, { "epoch": 3.02, "grad_norm": 4.458808729537471, "learning_rate": 4.888359531772576e-06, "loss": 0.1277, "step": 76850 }, { "epoch": 3.02, "grad_norm": 2.6194750814850947, "learning_rate": 4.888317725752509e-06, "loss": 0.1167, "step": 76875 }, { "epoch": 3.03, "grad_norm": 2.4407102227373794, "learning_rate": 4.888275919732442e-06, "loss": 0.1298, "step": 76900 }, { "epoch": 3.03, "grad_norm": 2.6333427547264407, "learning_rate": 4.8882341137123755e-06, "loss": 0.1372, "step": 76925 }, { "epoch": 3.03, "grad_norm": 2.4980445842112227, "learning_rate": 4.888192307692308e-06, "loss": 0.1213, "step": 76950 }, { "epoch": 3.03, "grad_norm": 3.410736806496833, "learning_rate": 4.888150501672242e-06, "loss": 0.1324, "step": 76975 }, { "epoch": 3.03, "grad_norm": 1.9758670528816349, "learning_rate": 4.8881086956521744e-06, "loss": 0.1302, "step": 77000 }, { "epoch": 3.03, "grad_norm": 3.4608232211672334, "learning_rate": 4.888066889632108e-06, "loss": 0.1157, "step": 77025 }, { "epoch": 3.03, "grad_norm": 3.9971640082189137, "learning_rate": 4.888025083612041e-06, "loss": 0.1116, "step": 77050 }, { "epoch": 3.03, "grad_norm": 3.7059743493192046, "learning_rate": 4.887983277591974e-06, "loss": 0.1168, "step": 77075 }, { "epoch": 3.03, "grad_norm": 3.525869534264302, "learning_rate": 4.887941471571907e-06, "loss": 0.1272, "step": 77100 }, { "epoch": 3.03, "grad_norm": 3.3799153930239405, "learning_rate": 4.88789966555184e-06, "loss": 0.1373, "step": 77125 }, { "epoch": 3.04, "grad_norm": 2.979968630998421, "learning_rate": 4.887857859531772e-06, "loss": 0.1173, "step": 77150 }, { "epoch": 3.04, "grad_norm": 3.116452957430253, "learning_rate": 4.887816053511706e-06, "loss": 0.1313, "step": 77175 }, { "epoch": 3.04, "grad_norm": 4.502809671470845, "learning_rate": 4.8877742474916386e-06, "loss": 0.1211, "step": 77200 }, { "epoch": 3.04, "grad_norm": 3.3430787138363804, "learning_rate": 4.887732441471572e-06, "loss": 0.1137, "step": 77225 }, { "epoch": 3.04, "grad_norm": 3.4815832417449935, "learning_rate": 4.887690635451506e-06, "loss": 0.124, "step": 77250 }, { "epoch": 3.04, "grad_norm": 2.8682967863246294, "learning_rate": 4.887648829431438e-06, "loss": 0.1292, "step": 77275 }, { "epoch": 3.04, "grad_norm": 3.5200313476756526, "learning_rate": 4.887607023411372e-06, "loss": 0.1175, "step": 77300 }, { "epoch": 3.04, "grad_norm": 3.7267567794932743, "learning_rate": 4.887565217391305e-06, "loss": 0.1252, "step": 77325 }, { "epoch": 3.04, "grad_norm": 3.9971687272656258, "learning_rate": 4.887523411371238e-06, "loss": 0.1274, "step": 77350 }, { "epoch": 3.04, "grad_norm": 4.970670446361493, "learning_rate": 4.887481605351171e-06, "loss": 0.1329, "step": 77375 }, { "epoch": 3.04, "grad_norm": 2.4841506585451643, "learning_rate": 4.887439799331104e-06, "loss": 0.1214, "step": 77400 }, { "epoch": 3.05, "grad_norm": 4.295568567553117, "learning_rate": 4.887397993311037e-06, "loss": 0.1232, "step": 77425 }, { "epoch": 3.05, "grad_norm": 4.063655366491959, "learning_rate": 4.887356187290971e-06, "loss": 0.1326, "step": 77450 }, { "epoch": 3.05, "grad_norm": 2.8435059530091262, "learning_rate": 4.887314381270903e-06, "loss": 0.1242, "step": 77475 }, { "epoch": 3.05, "grad_norm": 2.1310397377270602, "learning_rate": 4.887272575250837e-06, "loss": 0.1181, "step": 77500 }, { "epoch": 3.05, "grad_norm": 3.3838400346860897, "learning_rate": 4.8872307692307696e-06, "loss": 0.1243, "step": 77525 }, { "epoch": 3.05, "grad_norm": 3.642961468985254, "learning_rate": 4.887188963210703e-06, "loss": 0.1402, "step": 77550 }, { "epoch": 3.05, "grad_norm": 2.593211405878639, "learning_rate": 4.887147157190636e-06, "loss": 0.1219, "step": 77575 }, { "epoch": 3.05, "grad_norm": 4.739286033182659, "learning_rate": 4.887105351170569e-06, "loss": 0.1313, "step": 77600 }, { "epoch": 3.05, "grad_norm": 3.05581489965152, "learning_rate": 4.887063545150502e-06, "loss": 0.1201, "step": 77625 }, { "epoch": 3.05, "grad_norm": 3.0899269653481745, "learning_rate": 4.887021739130436e-06, "loss": 0.1247, "step": 77650 }, { "epoch": 3.06, "grad_norm": 3.758598812637565, "learning_rate": 4.886979933110368e-06, "loss": 0.1213, "step": 77675 }, { "epoch": 3.06, "grad_norm": 3.6037831919337875, "learning_rate": 4.886938127090302e-06, "loss": 0.128, "step": 77700 }, { "epoch": 3.06, "grad_norm": 4.451175536710578, "learning_rate": 4.8868963210702345e-06, "loss": 0.1311, "step": 77725 }, { "epoch": 3.06, "grad_norm": 3.09548461829267, "learning_rate": 4.886854515050168e-06, "loss": 0.1207, "step": 77750 }, { "epoch": 3.06, "grad_norm": 3.689827710615845, "learning_rate": 4.886812709030101e-06, "loss": 0.1152, "step": 77775 }, { "epoch": 3.06, "grad_norm": 3.7771578128301226, "learning_rate": 4.886770903010034e-06, "loss": 0.1242, "step": 77800 }, { "epoch": 3.06, "grad_norm": 4.031573600779883, "learning_rate": 4.886729096989967e-06, "loss": 0.1324, "step": 77825 }, { "epoch": 3.06, "grad_norm": 3.426503558716008, "learning_rate": 4.8866872909699e-06, "loss": 0.1173, "step": 77850 }, { "epoch": 3.06, "grad_norm": 2.4533360047703456, "learning_rate": 4.886645484949833e-06, "loss": 0.1177, "step": 77875 }, { "epoch": 3.06, "grad_norm": 3.6081638322635046, "learning_rate": 4.886603678929766e-06, "loss": 0.1318, "step": 77900 }, { "epoch": 3.07, "grad_norm": 3.651931508167466, "learning_rate": 4.8865618729096995e-06, "loss": 0.1278, "step": 77925 }, { "epoch": 3.07, "grad_norm": 3.133415821529739, "learning_rate": 4.886520066889632e-06, "loss": 0.1227, "step": 77950 }, { "epoch": 3.07, "grad_norm": 2.6432438963375477, "learning_rate": 4.886478260869566e-06, "loss": 0.1226, "step": 77975 }, { "epoch": 3.07, "grad_norm": 4.072468610495336, "learning_rate": 4.8864364548494985e-06, "loss": 0.1336, "step": 78000 }, { "epoch": 3.07, "grad_norm": 3.607977716821972, "learning_rate": 4.886394648829432e-06, "loss": 0.1259, "step": 78025 }, { "epoch": 3.07, "grad_norm": 3.192830764597763, "learning_rate": 4.886352842809365e-06, "loss": 0.1216, "step": 78050 }, { "epoch": 3.07, "grad_norm": 3.583472664299709, "learning_rate": 4.886311036789298e-06, "loss": 0.1296, "step": 78075 }, { "epoch": 3.07, "grad_norm": 3.1812193947409777, "learning_rate": 4.886269230769231e-06, "loss": 0.1129, "step": 78100 }, { "epoch": 3.07, "grad_norm": 3.5681957955883603, "learning_rate": 4.8862274247491645e-06, "loss": 0.1257, "step": 78125 }, { "epoch": 3.07, "grad_norm": 4.3655757139060585, "learning_rate": 4.886185618729097e-06, "loss": 0.1176, "step": 78150 }, { "epoch": 3.08, "grad_norm": 3.627286736907382, "learning_rate": 4.886143812709031e-06, "loss": 0.128, "step": 78175 }, { "epoch": 3.08, "grad_norm": 4.110026082315242, "learning_rate": 4.8861020066889634e-06, "loss": 0.1314, "step": 78200 }, { "epoch": 3.08, "grad_norm": 3.540369512342808, "learning_rate": 4.886060200668897e-06, "loss": 0.1242, "step": 78225 }, { "epoch": 3.08, "grad_norm": 3.0234131517149936, "learning_rate": 4.886020066889632e-06, "loss": 0.1132, "step": 78250 }, { "epoch": 3.08, "grad_norm": 3.4765331389271035, "learning_rate": 4.8859782608695655e-06, "loss": 0.1241, "step": 78275 }, { "epoch": 3.08, "grad_norm": 3.7796806155271914, "learning_rate": 4.885936454849498e-06, "loss": 0.1158, "step": 78300 }, { "epoch": 3.08, "grad_norm": 2.8841477452018838, "learning_rate": 4.885894648829432e-06, "loss": 0.1493, "step": 78325 }, { "epoch": 3.08, "grad_norm": 4.4433914203454625, "learning_rate": 4.885852842809365e-06, "loss": 0.1196, "step": 78350 }, { "epoch": 3.08, "grad_norm": 2.75884085554516, "learning_rate": 4.885811036789298e-06, "loss": 0.1188, "step": 78375 }, { "epoch": 3.08, "grad_norm": 4.273808052650839, "learning_rate": 4.8857692307692316e-06, "loss": 0.1217, "step": 78400 }, { "epoch": 3.09, "grad_norm": 3.897334135564084, "learning_rate": 4.885727424749164e-06, "loss": 0.1286, "step": 78425 }, { "epoch": 3.09, "grad_norm": 4.615939251349839, "learning_rate": 4.885685618729098e-06, "loss": 0.1362, "step": 78450 }, { "epoch": 3.09, "grad_norm": 3.730304960317534, "learning_rate": 4.8856438127090305e-06, "loss": 0.1344, "step": 78475 }, { "epoch": 3.09, "grad_norm": 3.5702453678071944, "learning_rate": 4.885602006688964e-06, "loss": 0.1254, "step": 78500 }, { "epoch": 3.09, "grad_norm": 2.9459198258987245, "learning_rate": 4.885560200668897e-06, "loss": 0.1155, "step": 78525 }, { "epoch": 3.09, "grad_norm": 2.9422269730368202, "learning_rate": 4.88551839464883e-06, "loss": 0.1341, "step": 78550 }, { "epoch": 3.09, "grad_norm": 4.345971398090705, "learning_rate": 4.885476588628763e-06, "loss": 0.125, "step": 78575 }, { "epoch": 3.09, "grad_norm": 3.4203734766216543, "learning_rate": 4.8854347826086965e-06, "loss": 0.1219, "step": 78600 }, { "epoch": 3.09, "grad_norm": 2.7450396553454763, "learning_rate": 4.885392976588629e-06, "loss": 0.1273, "step": 78625 }, { "epoch": 3.09, "grad_norm": 2.6718974934824007, "learning_rate": 4.885351170568563e-06, "loss": 0.1431, "step": 78650 }, { "epoch": 3.1, "grad_norm": 2.853838303655648, "learning_rate": 4.8853093645484955e-06, "loss": 0.125, "step": 78675 }, { "epoch": 3.1, "grad_norm": 2.9709753361358064, "learning_rate": 4.885267558528429e-06, "loss": 0.1244, "step": 78700 }, { "epoch": 3.1, "grad_norm": 4.023857278065838, "learning_rate": 4.885225752508362e-06, "loss": 0.1293, "step": 78725 }, { "epoch": 3.1, "grad_norm": 4.373864745184071, "learning_rate": 4.885183946488295e-06, "loss": 0.1319, "step": 78750 }, { "epoch": 3.1, "grad_norm": 2.508205959991612, "learning_rate": 4.885142140468228e-06, "loss": 0.1273, "step": 78775 }, { "epoch": 3.1, "grad_norm": 2.6411187927711772, "learning_rate": 4.885100334448161e-06, "loss": 0.1311, "step": 78800 }, { "epoch": 3.1, "grad_norm": 3.3179749804960834, "learning_rate": 4.885058528428094e-06, "loss": 0.1206, "step": 78825 }, { "epoch": 3.1, "grad_norm": 3.7366552197499647, "learning_rate": 4.885016722408027e-06, "loss": 0.1246, "step": 78850 }, { "epoch": 3.1, "grad_norm": 3.118992258293641, "learning_rate": 4.8849749163879605e-06, "loss": 0.1126, "step": 78875 }, { "epoch": 3.1, "grad_norm": 2.056430415904338, "learning_rate": 4.884933110367893e-06, "loss": 0.1069, "step": 78900 }, { "epoch": 3.1, "grad_norm": 3.785563939242501, "learning_rate": 4.884891304347827e-06, "loss": 0.1279, "step": 78925 }, { "epoch": 3.11, "grad_norm": 3.522011901121123, "learning_rate": 4.884849498327759e-06, "loss": 0.1221, "step": 78950 }, { "epoch": 3.11, "grad_norm": 2.815780165397175, "learning_rate": 4.884807692307693e-06, "loss": 0.1252, "step": 78975 }, { "epoch": 3.11, "grad_norm": 3.6460270553802965, "learning_rate": 4.884765886287626e-06, "loss": 0.1291, "step": 79000 }, { "epoch": 3.11, "grad_norm": 3.267409610114439, "learning_rate": 4.884724080267559e-06, "loss": 0.131, "step": 79025 }, { "epoch": 3.11, "grad_norm": 3.512502522504111, "learning_rate": 4.884682274247492e-06, "loss": 0.1193, "step": 79050 }, { "epoch": 3.11, "grad_norm": 4.876716295837919, "learning_rate": 4.8846404682274254e-06, "loss": 0.1279, "step": 79075 }, { "epoch": 3.11, "grad_norm": 4.068141572812643, "learning_rate": 4.884598662207358e-06, "loss": 0.1191, "step": 79100 }, { "epoch": 3.11, "grad_norm": 2.57695746518627, "learning_rate": 4.884556856187292e-06, "loss": 0.1218, "step": 79125 }, { "epoch": 3.11, "grad_norm": 2.8362615333679826, "learning_rate": 4.884515050167224e-06, "loss": 0.1169, "step": 79150 }, { "epoch": 3.11, "grad_norm": 3.7181091078142487, "learning_rate": 4.884473244147158e-06, "loss": 0.1271, "step": 79175 }, { "epoch": 3.12, "grad_norm": 2.562858519582811, "learning_rate": 4.884431438127091e-06, "loss": 0.132, "step": 79200 }, { "epoch": 3.12, "grad_norm": 3.226476454470988, "learning_rate": 4.884389632107024e-06, "loss": 0.1202, "step": 79225 }, { "epoch": 3.12, "grad_norm": 3.251373898366226, "learning_rate": 4.884349498327759e-06, "loss": 0.1468, "step": 79250 }, { "epoch": 3.12, "grad_norm": 3.198441179981739, "learning_rate": 4.884307692307693e-06, "loss": 0.1273, "step": 79275 }, { "epoch": 3.12, "grad_norm": 3.0629650236020756, "learning_rate": 4.884265886287625e-06, "loss": 0.1231, "step": 79300 }, { "epoch": 3.12, "grad_norm": 3.2827139225909976, "learning_rate": 4.884224080267559e-06, "loss": 0.1404, "step": 79325 }, { "epoch": 3.12, "grad_norm": 3.144707188115353, "learning_rate": 4.884182274247492e-06, "loss": 0.1298, "step": 79350 }, { "epoch": 3.12, "grad_norm": 2.9421101700328185, "learning_rate": 4.884140468227425e-06, "loss": 0.1405, "step": 79375 }, { "epoch": 3.12, "grad_norm": 4.59682086895325, "learning_rate": 4.884098662207358e-06, "loss": 0.1441, "step": 79400 }, { "epoch": 3.12, "grad_norm": 3.391160492130251, "learning_rate": 4.884056856187291e-06, "loss": 0.1375, "step": 79425 }, { "epoch": 3.13, "grad_norm": 3.180088795295559, "learning_rate": 4.884015050167225e-06, "loss": 0.1287, "step": 79450 }, { "epoch": 3.13, "grad_norm": 4.570107888816917, "learning_rate": 4.883973244147158e-06, "loss": 0.1255, "step": 79475 }, { "epoch": 3.13, "grad_norm": 3.0789019235619275, "learning_rate": 4.883931438127091e-06, "loss": 0.1254, "step": 79500 }, { "epoch": 3.13, "grad_norm": 3.5793084776409003, "learning_rate": 4.883889632107024e-06, "loss": 0.1181, "step": 79525 }, { "epoch": 3.13, "grad_norm": 3.797918053156867, "learning_rate": 4.8838478260869575e-06, "loss": 0.1248, "step": 79550 }, { "epoch": 3.13, "grad_norm": 3.027925691307738, "learning_rate": 4.88380602006689e-06, "loss": 0.1218, "step": 79575 }, { "epoch": 3.13, "grad_norm": 5.0368441312464, "learning_rate": 4.883764214046824e-06, "loss": 0.1157, "step": 79600 }, { "epoch": 3.13, "grad_norm": 3.8288632838026757, "learning_rate": 4.883722408026756e-06, "loss": 0.1156, "step": 79625 }, { "epoch": 3.13, "grad_norm": 2.597067146027184, "learning_rate": 4.88368060200669e-06, "loss": 0.1297, "step": 79650 }, { "epoch": 3.13, "grad_norm": 3.500369841183393, "learning_rate": 4.883638795986623e-06, "loss": 0.1239, "step": 79675 }, { "epoch": 3.14, "grad_norm": 2.9150129908770857, "learning_rate": 4.883596989966556e-06, "loss": 0.1264, "step": 79700 }, { "epoch": 3.14, "grad_norm": 3.720662911601689, "learning_rate": 4.883555183946488e-06, "loss": 0.1289, "step": 79725 }, { "epoch": 3.14, "grad_norm": 3.340766578368334, "learning_rate": 4.883513377926422e-06, "loss": 0.1416, "step": 79750 }, { "epoch": 3.14, "grad_norm": 3.3314964950611614, "learning_rate": 4.883471571906354e-06, "loss": 0.1245, "step": 79775 }, { "epoch": 3.14, "grad_norm": 4.481987339852263, "learning_rate": 4.883429765886288e-06, "loss": 0.1423, "step": 79800 }, { "epoch": 3.14, "grad_norm": 2.6168443489008637, "learning_rate": 4.8833879598662205e-06, "loss": 0.1343, "step": 79825 }, { "epoch": 3.14, "grad_norm": 3.9455102466510907, "learning_rate": 4.883346153846154e-06, "loss": 0.1227, "step": 79850 }, { "epoch": 3.14, "grad_norm": 2.6209413472131025, "learning_rate": 4.883304347826088e-06, "loss": 0.1405, "step": 79875 }, { "epoch": 3.14, "grad_norm": 4.187224029783121, "learning_rate": 4.88326254180602e-06, "loss": 0.1346, "step": 79900 }, { "epoch": 3.14, "grad_norm": 3.9706536290794507, "learning_rate": 4.883220735785954e-06, "loss": 0.1334, "step": 79925 }, { "epoch": 3.15, "grad_norm": 3.1222225652663607, "learning_rate": 4.8831789297658866e-06, "loss": 0.1165, "step": 79950 }, { "epoch": 3.15, "grad_norm": 3.1495103383983034, "learning_rate": 4.88313712374582e-06, "loss": 0.119, "step": 79975 }, { "epoch": 3.15, "grad_norm": 3.5192842049873176, "learning_rate": 4.883095317725753e-06, "loss": 0.121, "step": 80000 }, { "epoch": 3.15, "eval_loss": 0.5361328125, "eval_runtime": 11580.0151, "eval_samples_per_second": 0.818, "eval_steps_per_second": 0.051, "eval_wer": 0.11300934639576597, "step": 80000 }, { "epoch": 3.15, "grad_norm": 3.382723320522212, "learning_rate": 1.4347222222222223e-07, "loss": 0.1257, "step": 80025 }, { "epoch": 3.15, "grad_norm": 3.7499859028087914, "learning_rate": 1.417361111111111e-07, "loss": 0.1128, "step": 80050 }, { "epoch": 3.15, "grad_norm": 2.9592785016890195, "learning_rate": 1.4e-07, "loss": 0.1195, "step": 80075 }, { "epoch": 3.15, "grad_norm": 3.4893216360464137, "learning_rate": 1.3826388888888889e-07, "loss": 0.1313, "step": 80100 }, { "epoch": 3.15, "grad_norm": 3.719554257846966, "learning_rate": 1.365277777777778e-07, "loss": 0.1113, "step": 80125 }, { "epoch": 3.15, "grad_norm": 3.3150805950550217, "learning_rate": 1.347916666666667e-07, "loss": 0.124, "step": 80150 }, { "epoch": 3.15, "grad_norm": 3.810734588250568, "learning_rate": 1.3305555555555557e-07, "loss": 0.1217, "step": 80175 }, { "epoch": 3.15, "grad_norm": 2.616845456697401, "learning_rate": 1.3131944444444444e-07, "loss": 0.1277, "step": 80200 }, { "epoch": 3.16, "grad_norm": 3.2366291515709578, "learning_rate": 1.2958333333333335e-07, "loss": 0.1152, "step": 80225 }, { "epoch": 3.16, "grad_norm": 3.7270587123667935, "learning_rate": 1.2791666666666668e-07, "loss": 0.1214, "step": 80250 }, { "epoch": 3.16, "grad_norm": 3.79323438662421, "learning_rate": 1.2618055555555558e-07, "loss": 0.1164, "step": 80275 }, { "epoch": 3.16, "grad_norm": 3.388340458302277, "learning_rate": 1.2444444444444446e-07, "loss": 0.1053, "step": 80300 }, { "epoch": 3.16, "grad_norm": 3.0068550159409613, "learning_rate": 1.2270833333333333e-07, "loss": 0.1363, "step": 80325 }, { "epoch": 3.16, "grad_norm": 2.6705820472591024, "learning_rate": 1.2097222222222223e-07, "loss": 0.1184, "step": 80350 }, { "epoch": 3.16, "grad_norm": 3.6711884295544013, "learning_rate": 1.192361111111111e-07, "loss": 0.1183, "step": 80375 }, { "epoch": 3.16, "grad_norm": 4.303935705314459, "learning_rate": 1.1750000000000001e-07, "loss": 0.1312, "step": 80400 }, { "epoch": 3.16, "grad_norm": 3.0047973076097407, "learning_rate": 1.157638888888889e-07, "loss": 0.1308, "step": 80425 }, { "epoch": 3.16, "grad_norm": 3.809379593211185, "learning_rate": 1.1402777777777778e-07, "loss": 0.1167, "step": 80450 }, { "epoch": 3.16, "grad_norm": 2.9141385074256534, "learning_rate": 1.1229166666666668e-07, "loss": 0.1215, "step": 80475 }, { "epoch": 3.17, "grad_norm": 3.6712555964990172, "learning_rate": 1.1055555555555557e-07, "loss": 0.1307, "step": 80500 }, { "epoch": 3.17, "grad_norm": 2.821579337006869, "learning_rate": 1.0881944444444445e-07, "loss": 0.1114, "step": 80525 }, { "epoch": 3.17, "grad_norm": 3.7286248730869658, "learning_rate": 1.0708333333333335e-07, "loss": 0.1346, "step": 80550 }, { "epoch": 3.17, "grad_norm": 3.0656348499738315, "learning_rate": 1.0534722222222223e-07, "loss": 0.1149, "step": 80575 }, { "epoch": 3.17, "grad_norm": 4.331509026966981, "learning_rate": 1.0361111111111111e-07, "loss": 0.1107, "step": 80600 }, { "epoch": 3.17, "grad_norm": 1.8774186348904527, "learning_rate": 1.0187500000000002e-07, "loss": 0.1117, "step": 80625 }, { "epoch": 3.17, "grad_norm": 2.706357801131824, "learning_rate": 1.0013888888888889e-07, "loss": 0.1129, "step": 80650 }, { "epoch": 3.17, "grad_norm": 3.5291983010831305, "learning_rate": 9.840277777777778e-08, "loss": 0.1276, "step": 80675 }, { "epoch": 3.17, "grad_norm": 2.5884193020093105, "learning_rate": 9.666666666666669e-08, "loss": 0.1228, "step": 80700 }, { "epoch": 3.17, "grad_norm": 3.065588647509654, "learning_rate": 9.493055555555556e-08, "loss": 0.1282, "step": 80725 }, { "epoch": 3.18, "grad_norm": 3.4441302429520957, "learning_rate": 9.319444444444445e-08, "loss": 0.1109, "step": 80750 }, { "epoch": 3.18, "grad_norm": 3.568478894528398, "learning_rate": 9.145833333333335e-08, "loss": 0.115, "step": 80775 }, { "epoch": 3.18, "grad_norm": 3.2298861894081052, "learning_rate": 8.972222222222223e-08, "loss": 0.1079, "step": 80800 }, { "epoch": 3.18, "grad_norm": 3.0351733682866784, "learning_rate": 8.798611111111112e-08, "loss": 0.1155, "step": 80825 }, { "epoch": 3.18, "grad_norm": 2.8423943298154777, "learning_rate": 8.625000000000001e-08, "loss": 0.1058, "step": 80850 }, { "epoch": 3.18, "grad_norm": 4.239375206161924, "learning_rate": 8.45138888888889e-08, "loss": 0.1202, "step": 80875 }, { "epoch": 3.18, "grad_norm": 3.9414210967413044, "learning_rate": 8.277777777777779e-08, "loss": 0.1226, "step": 80900 }, { "epoch": 3.18, "grad_norm": 2.6741087247987356, "learning_rate": 8.104166666666668e-08, "loss": 0.1131, "step": 80925 }, { "epoch": 3.18, "grad_norm": 2.470938835323759, "learning_rate": 7.930555555555557e-08, "loss": 0.1006, "step": 80950 }, { "epoch": 3.18, "grad_norm": 3.1023252839310533, "learning_rate": 7.756944444444445e-08, "loss": 0.1192, "step": 80975 }, { "epoch": 3.19, "grad_norm": 3.1292889463150133, "learning_rate": 7.583333333333334e-08, "loss": 0.1111, "step": 81000 }, { "epoch": 3.19, "grad_norm": 2.8191984119816373, "learning_rate": 7.409722222222222e-08, "loss": 0.1214, "step": 81025 }, { "epoch": 3.19, "grad_norm": 2.699005654746743, "learning_rate": 7.236111111111111e-08, "loss": 0.1083, "step": 81050 }, { "epoch": 3.19, "grad_norm": 2.5511639322171162, "learning_rate": 7.062500000000001e-08, "loss": 0.1028, "step": 81075 }, { "epoch": 3.19, "grad_norm": 4.017135101608909, "learning_rate": 6.888888888888889e-08, "loss": 0.108, "step": 81100 }, { "epoch": 3.19, "grad_norm": 3.354633095244795, "learning_rate": 6.715277777777778e-08, "loss": 0.1187, "step": 81125 }, { "epoch": 3.19, "grad_norm": 2.968280201052923, "learning_rate": 6.541666666666668e-08, "loss": 0.1117, "step": 81150 }, { "epoch": 3.19, "grad_norm": 3.441489150840829, "learning_rate": 6.368055555555556e-08, "loss": 0.1168, "step": 81175 }, { "epoch": 3.19, "grad_norm": 3.397176047085528, "learning_rate": 6.194444444444445e-08, "loss": 0.1167, "step": 81200 }, { "epoch": 3.19, "grad_norm": 3.7413484965620807, "learning_rate": 6.020833333333335e-08, "loss": 0.1245, "step": 81225 }, { "epoch": 3.2, "grad_norm": 3.368881934301906, "learning_rate": 5.8541666666666666e-08, "loss": 0.1164, "step": 81250 }, { "epoch": 3.2, "grad_norm": 3.5376665390407798, "learning_rate": 5.680555555555556e-08, "loss": 0.1125, "step": 81275 }, { "epoch": 3.2, "grad_norm": 3.071272749793487, "learning_rate": 5.506944444444445e-08, "loss": 0.1043, "step": 81300 }, { "epoch": 3.2, "grad_norm": 2.8957813289832415, "learning_rate": 5.3333333333333334e-08, "loss": 0.1227, "step": 81325 }, { "epoch": 3.2, "grad_norm": 3.177849245107898, "learning_rate": 5.1597222222222224e-08, "loss": 0.1151, "step": 81350 }, { "epoch": 3.2, "grad_norm": 3.036992433502032, "learning_rate": 4.986111111111112e-08, "loss": 0.1067, "step": 81375 }, { "epoch": 3.2, "grad_norm": 3.2947567806935134, "learning_rate": 4.8125e-08, "loss": 0.1169, "step": 81400 }, { "epoch": 3.2, "grad_norm": 2.6033716960489306, "learning_rate": 4.638888888888889e-08, "loss": 0.1308, "step": 81425 }, { "epoch": 3.2, "grad_norm": 3.271050187274925, "learning_rate": 4.465277777777779e-08, "loss": 0.1216, "step": 81450 }, { "epoch": 3.2, "grad_norm": 3.228010138283361, "learning_rate": 4.291666666666667e-08, "loss": 0.1178, "step": 81475 }, { "epoch": 3.21, "grad_norm": 1.4784343520529, "learning_rate": 4.118055555555556e-08, "loss": 0.1104, "step": 81500 }, { "epoch": 3.21, "grad_norm": 3.007137030167928, "learning_rate": 3.944444444444445e-08, "loss": 0.1063, "step": 81525 }, { "epoch": 3.21, "grad_norm": 3.391635916189553, "learning_rate": 3.770833333333334e-08, "loss": 0.1057, "step": 81550 }, { "epoch": 3.21, "grad_norm": 2.9441226132043647, "learning_rate": 3.597222222222222e-08, "loss": 0.1265, "step": 81575 }, { "epoch": 3.21, "grad_norm": 2.4559004956821036, "learning_rate": 3.423611111111112e-08, "loss": 0.107, "step": 81600 }, { "epoch": 3.21, "grad_norm": 3.1518532804348625, "learning_rate": 3.25e-08, "loss": 0.1375, "step": 81625 }, { "epoch": 3.21, "grad_norm": 2.8691924917711353, "learning_rate": 3.076388888888889e-08, "loss": 0.1073, "step": 81650 }, { "epoch": 3.21, "grad_norm": 2.866450101574571, "learning_rate": 2.902777777777778e-08, "loss": 0.1081, "step": 81675 }, { "epoch": 3.21, "grad_norm": 4.055904824711259, "learning_rate": 2.7291666666666668e-08, "loss": 0.1108, "step": 81700 }, { "epoch": 3.21, "grad_norm": 3.35548877818344, "learning_rate": 2.555555555555556e-08, "loss": 0.1175, "step": 81725 }, { "epoch": 3.22, "grad_norm": 3.0025022533511128, "learning_rate": 2.3819444444444447e-08, "loss": 0.1196, "step": 81750 }, { "epoch": 3.22, "grad_norm": 3.9601682959534625, "learning_rate": 2.2083333333333336e-08, "loss": 0.121, "step": 81775 }, { "epoch": 3.22, "grad_norm": 3.5576863922110955, "learning_rate": 2.0347222222222222e-08, "loss": 0.1156, "step": 81800 }, { "epoch": 3.22, "grad_norm": 3.0524980328514615, "learning_rate": 1.861111111111111e-08, "loss": 0.107, "step": 81825 }, { "epoch": 3.22, "grad_norm": 2.5465332535964564, "learning_rate": 1.6875e-08, "loss": 0.1104, "step": 81850 }, { "epoch": 3.22, "grad_norm": 3.9758255525233475, "learning_rate": 1.513888888888889e-08, "loss": 0.1192, "step": 81875 }, { "epoch": 3.22, "grad_norm": 3.156736393762649, "learning_rate": 1.3402777777777778e-08, "loss": 0.1246, "step": 81900 }, { "epoch": 3.22, "grad_norm": 2.193575947789099, "learning_rate": 1.1666666666666669e-08, "loss": 0.1183, "step": 81925 }, { "epoch": 3.22, "grad_norm": 4.202801227471746, "learning_rate": 9.930555555555556e-09, "loss": 0.1035, "step": 81950 }, { "epoch": 3.22, "grad_norm": 4.617004459055916, "learning_rate": 8.194444444444446e-09, "loss": 0.1142, "step": 81975 }, { "epoch": 3.22, "grad_norm": 3.2809986802623974, "learning_rate": 6.458333333333334e-09, "loss": 0.1231, "step": 82000 }, { "epoch": 3.22, "step": 82000, "total_flos": 7.219553262695875e+18, "train_loss": 0.0028538422119326707, "train_runtime": 23952.0519, "train_samples_per_second": 109.552, "train_steps_per_second": 3.424 } ], "logging_steps": 25, "max_steps": 82000, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 5000, "total_flos": 7.219553262695875e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }