diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,4881 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.44236188478930466, + "eval_steps": 500, + "global_step": 40500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 0.00019996132455658605, + "loss": 42.0911, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999226491131721, + "loss": 38.7852, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019988397366975814, + "loss": 31.5332, + "step": 150 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019984529822634418, + "loss": 25.7456, + "step": 200 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001998066227829302, + "loss": 20.5779, + "step": 250 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019976794733951624, + "loss": 17.4712, + "step": 300 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019972927189610228, + "loss": 14.2287, + "step": 350 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019969059645268835, + "loss": 12.748, + "step": 400 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001996519210092744, + "loss": 11.9603, + "step": 450 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019961324556586044, + "loss": 12.6114, + "step": 500 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019957457012244646, + "loss": 11.2424, + "step": 550 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001995358946790325, + "loss": 10.4185, + "step": 600 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019949721923561854, + "loss": 10.6495, + "step": 650 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019945854379220458, + "loss": 8.6583, + "step": 700 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019941986834879063, + "loss": 7.9045, + "step": 750 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019938119290537667, + "loss": 7.5867, + "step": 800 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019934251746196271, + "loss": 6.344, + "step": 850 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019930384201854876, + "loss": 7.0004, + "step": 900 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001992651665751348, + "loss": 6.0177, + "step": 950 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019922649113172082, + "loss": 6.4546, + "step": 1000 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019918781568830686, + "loss": 4.9639, + "step": 1050 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001991491402448929, + "loss": 4.5082, + "step": 1100 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019911046480147897, + "loss": 4.9607, + "step": 1150 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019907178935806501, + "loss": 4.6557, + "step": 1200 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019903311391465106, + "loss": 3.7942, + "step": 1250 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001989944384712371, + "loss": 3.3466, + "step": 1300 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019895576302782312, + "loss": 4.2555, + "step": 1350 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019891708758440916, + "loss": 3.7983, + "step": 1400 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001988784121409952, + "loss": 3.6397, + "step": 1450 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019883973669758125, + "loss": 3.2206, + "step": 1500 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001988010612541673, + "loss": 2.9513, + "step": 1550 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019876238581075333, + "loss": 3.443, + "step": 1600 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019872371036733938, + "loss": 2.6171, + "step": 1650 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019868503492392542, + "loss": 2.6626, + "step": 1700 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019864635948051143, + "loss": 3.2079, + "step": 1750 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019860768403709748, + "loss": 2.679, + "step": 1800 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019856900859368352, + "loss": 3.2509, + "step": 1850 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001985303331502696, + "loss": 2.3529, + "step": 1900 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019849165770685563, + "loss": 2.3721, + "step": 1950 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019845298226344168, + "loss": 2.7719, + "step": 2000 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019841430682002772, + "loss": 2.3059, + "step": 2050 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019837563137661374, + "loss": 2.9214, + "step": 2100 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019833695593319978, + "loss": 2.4541, + "step": 2150 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019829828048978582, + "loss": 2.3267, + "step": 2200 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019825960504637186, + "loss": 2.1945, + "step": 2250 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001982209296029579, + "loss": 2.3966, + "step": 2300 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019818225415954395, + "loss": 2.5349, + "step": 2350 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019814357871613, + "loss": 2.0588, + "step": 2400 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019810490327271604, + "loss": 3.1209, + "step": 2450 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019806622782930205, + "loss": 2.3281, + "step": 2500 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001980275523858881, + "loss": 2.0749, + "step": 2550 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019798887694247414, + "loss": 2.1665, + "step": 2600 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001979502014990602, + "loss": 2.5256, + "step": 2650 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019791152605564625, + "loss": 2.3435, + "step": 2700 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001978728506122323, + "loss": 2.2333, + "step": 2750 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019783417516881834, + "loss": 1.9695, + "step": 2800 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019779549972540435, + "loss": 2.3046, + "step": 2850 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001977568242819904, + "loss": 2.1951, + "step": 2900 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019771814883857644, + "loss": 2.2141, + "step": 2950 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019767947339516248, + "loss": 2.3285, + "step": 3000 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019764079795174853, + "loss": 1.9263, + "step": 3050 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019760212250833457, + "loss": 2.4391, + "step": 3100 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001975634470649206, + "loss": 2.2386, + "step": 3150 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019752477162150665, + "loss": 1.9979, + "step": 3200 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019748609617809267, + "loss": 2.2926, + "step": 3250 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001974474207346787, + "loss": 2.0263, + "step": 3300 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019740874529126476, + "loss": 2.3533, + "step": 3350 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019737006984785083, + "loss": 2.0248, + "step": 3400 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019733139440443687, + "loss": 1.5322, + "step": 3450 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001972927189610229, + "loss": 1.2563, + "step": 3500 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019725404351760896, + "loss": 1.2361, + "step": 3550 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019721536807419497, + "loss": 1.3821, + "step": 3600 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019717669263078101, + "loss": 1.0988, + "step": 3650 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019713801718736706, + "loss": 1.2244, + "step": 3700 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001970993417439531, + "loss": 0.9095, + "step": 3750 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019706066630053914, + "loss": 1.2458, + "step": 3800 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001970219908571252, + "loss": 1.1168, + "step": 3850 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019698331541371123, + "loss": 0.7974, + "step": 3900 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019694463997029727, + "loss": 1.0594, + "step": 3950 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001969059645268833, + "loss": 1.2522, + "step": 4000 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019686728908346933, + "loss": 0.8916, + "step": 4050 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001968286136400554, + "loss": 0.9284, + "step": 4100 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019678993819664144, + "loss": 0.7177, + "step": 4150 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001967512627532275, + "loss": 1.0662, + "step": 4200 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019671258730981353, + "loss": 1.0509, + "step": 4250 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019667391186639957, + "loss": 1.0486, + "step": 4300 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001966352364229856, + "loss": 0.9541, + "step": 4350 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019659656097957163, + "loss": 1.1056, + "step": 4400 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019655788553615768, + "loss": 1.0613, + "step": 4450 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019651921009274372, + "loss": 0.9647, + "step": 4500 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019648053464932976, + "loss": 0.8281, + "step": 4550 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001964418592059158, + "loss": 0.8205, + "step": 4600 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019640318376250185, + "loss": 0.8107, + "step": 4650 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001963645083190879, + "loss": 0.9087, + "step": 4700 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019632583287567393, + "loss": 0.7476, + "step": 4750 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019628715743225995, + "loss": 0.8191, + "step": 4800 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019624848198884602, + "loss": 1.0138, + "step": 4850 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019620980654543206, + "loss": 1.0121, + "step": 4900 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001961711311020181, + "loss": 0.7376, + "step": 4950 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019613245565860415, + "loss": 0.8335, + "step": 5000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001960937802151902, + "loss": 0.9411, + "step": 5050 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001960551047717762, + "loss": 0.8631, + "step": 5100 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019601642932836225, + "loss": 0.936, + "step": 5150 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001959777538849483, + "loss": 0.8524, + "step": 5200 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019593907844153434, + "loss": 0.7093, + "step": 5250 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019590040299812038, + "loss": 0.8302, + "step": 5300 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019586172755470642, + "loss": 0.8756, + "step": 5350 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019582305211129247, + "loss": 0.7728, + "step": 5400 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001957843766678785, + "loss": 0.809, + "step": 5450 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019574570122446455, + "loss": 0.7282, + "step": 5500 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019570702578105057, + "loss": 0.8608, + "step": 5550 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019566835033763664, + "loss": 1.0078, + "step": 5600 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019562967489422268, + "loss": 0.9773, + "step": 5650 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019559099945080872, + "loss": 0.7969, + "step": 5700 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019555232400739477, + "loss": 0.9988, + "step": 5750 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001955136485639808, + "loss": 0.7552, + "step": 5800 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019547497312056685, + "loss": 0.8775, + "step": 5850 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019543629767715287, + "loss": 0.8895, + "step": 5900 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001953976222337389, + "loss": 0.8106, + "step": 5950 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019535894679032496, + "loss": 0.6546, + "step": 6000 + }, + { + "epoch": 0.05, + "learning_rate": 0.000195320271346911, + "loss": 0.9039, + "step": 6050 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019528159590349704, + "loss": 0.652, + "step": 6100 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019524292046008308, + "loss": 0.6561, + "step": 6150 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019520424501666913, + "loss": 1.046, + "step": 6200 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019516556957325517, + "loss": 0.8783, + "step": 6250 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019512689412984119, + "loss": 0.7351, + "step": 6300 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019508821868642726, + "loss": 0.733, + "step": 6350 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001950495432430133, + "loss": 0.7675, + "step": 6400 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019501086779959934, + "loss": 0.9451, + "step": 6450 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019497219235618539, + "loss": 0.9686, + "step": 6500 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019493351691277143, + "loss": 0.6083, + "step": 6550 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019489484146935747, + "loss": 0.8619, + "step": 6600 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001948561660259435, + "loss": 0.6557, + "step": 6650 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019481749058252953, + "loss": 0.8819, + "step": 6700 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019477881513911557, + "loss": 0.8356, + "step": 6750 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019474013969570162, + "loss": 0.8211, + "step": 6800 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019470146425228766, + "loss": 0.8393, + "step": 6850 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001946627888088737, + "loss": 1.0301, + "step": 6900 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019462411336545975, + "loss": 0.7435, + "step": 6950 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001945854379220458, + "loss": 0.71, + "step": 7000 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001945467624786318, + "loss": 0.7786, + "step": 7050 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019450808703521787, + "loss": 1.1273, + "step": 7100 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019446941159180392, + "loss": 0.923, + "step": 7150 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019443073614838996, + "loss": 0.8656, + "step": 7200 + }, + { + "epoch": 0.06, + "learning_rate": 0.000194392060704976, + "loss": 0.8191, + "step": 7250 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019435338526156205, + "loss": 0.8924, + "step": 7300 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001943147098181481, + "loss": 0.9004, + "step": 7350 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001942760343747341, + "loss": 0.6538, + "step": 7400 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019423735893132015, + "loss": 0.8669, + "step": 7450 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001941986834879062, + "loss": 0.9103, + "step": 7500 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019416000804449223, + "loss": 0.8853, + "step": 7550 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019412133260107828, + "loss": 0.7989, + "step": 7600 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019408265715766432, + "loss": 0.6957, + "step": 7650 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019404398171425036, + "loss": 0.8685, + "step": 7700 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001940053062708364, + "loss": 0.6701, + "step": 7750 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019396663082742242, + "loss": 0.7488, + "step": 7800 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001939279553840085, + "loss": 0.9214, + "step": 7850 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019388927994059454, + "loss": 0.7879, + "step": 7900 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019385060449718058, + "loss": 0.8522, + "step": 7950 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019381192905376662, + "loss": 0.9119, + "step": 8000 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019377325361035267, + "loss": 0.6229, + "step": 8050 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001937345781669387, + "loss": 0.8, + "step": 8100 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019369590272352472, + "loss": 0.6705, + "step": 8150 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019365722728011077, + "loss": 0.8694, + "step": 8200 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001936185518366968, + "loss": 0.7932, + "step": 8250 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019357987639328285, + "loss": 0.7311, + "step": 8300 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001935412009498689, + "loss": 0.844, + "step": 8350 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019350252550645494, + "loss": 0.8428, + "step": 8400 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019346385006304098, + "loss": 0.8791, + "step": 8450 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019342517461962703, + "loss": 0.9576, + "step": 8500 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019338649917621304, + "loss": 0.821, + "step": 8550 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001933478237327991, + "loss": 1.0343, + "step": 8600 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019330914828938515, + "loss": 0.862, + "step": 8650 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001932704728459712, + "loss": 0.6914, + "step": 8700 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019323179740255724, + "loss": 1.0047, + "step": 8750 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019319312195914328, + "loss": 0.7347, + "step": 8800 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019315444651572933, + "loss": 0.7331, + "step": 8850 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019311577107231534, + "loss": 0.9639, + "step": 8900 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019307709562890139, + "loss": 0.7824, + "step": 8950 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019303842018548743, + "loss": 0.8321, + "step": 9000 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019299974474207347, + "loss": 1.053, + "step": 9050 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019296106929865951, + "loss": 0.677, + "step": 9100 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019292239385524556, + "loss": 0.8771, + "step": 9150 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001928837184118316, + "loss": 0.7547, + "step": 9200 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019284504296841764, + "loss": 0.7911, + "step": 9250 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019280636752500366, + "loss": 0.8772, + "step": 9300 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019276769208158973, + "loss": 1.0254, + "step": 9350 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019272901663817577, + "loss": 0.9881, + "step": 9400 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019269034119476182, + "loss": 0.9809, + "step": 9450 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019265166575134786, + "loss": 0.6407, + "step": 9500 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001926129903079339, + "loss": 0.8552, + "step": 9550 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019257431486451994, + "loss": 0.5715, + "step": 9600 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019253563942110596, + "loss": 0.7908, + "step": 9650 + }, + { + "epoch": 0.08, + "learning_rate": 0.000192496963977692, + "loss": 0.8544, + "step": 9700 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019245828853427805, + "loss": 0.7795, + "step": 9750 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001924196130908641, + "loss": 0.7534, + "step": 9800 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019238093764745013, + "loss": 0.9141, + "step": 9850 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019234226220403618, + "loss": 0.6377, + "step": 9900 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019230358676062222, + "loss": 0.8392, + "step": 9950 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019226491131720826, + "loss": 0.8541, + "step": 10000 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001922262358737943, + "loss": 0.7969, + "step": 10050 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019218756043038035, + "loss": 0.6434, + "step": 10100 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001921488849869664, + "loss": 0.9645, + "step": 10150 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019211020954355243, + "loss": 0.8545, + "step": 10200 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019207153410013848, + "loss": 0.669, + "step": 10250 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019203285865672452, + "loss": 0.7878, + "step": 10300 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019199418321331056, + "loss": 0.6872, + "step": 10350 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019195550776989658, + "loss": 0.7578, + "step": 10400 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019191683232648262, + "loss": 0.6626, + "step": 10450 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019187815688306866, + "loss": 0.7433, + "step": 10500 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001918394814396547, + "loss": 0.8421, + "step": 10550 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019180080599624075, + "loss": 0.8302, + "step": 10600 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001917621305528268, + "loss": 0.7689, + "step": 10650 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019172345510941284, + "loss": 0.7695, + "step": 10700 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019168477966599888, + "loss": 0.8601, + "step": 10750 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019164610422258492, + "loss": 1.0576, + "step": 10800 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019160742877917097, + "loss": 0.6168, + "step": 10850 + }, + { + "epoch": 0.08, + "learning_rate": 0.000191568753335757, + "loss": 0.8053, + "step": 10900 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019153007789234305, + "loss": 0.6034, + "step": 10950 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001914914024489291, + "loss": 0.8146, + "step": 11000 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019145272700551514, + "loss": 0.813, + "step": 11050 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019141405156210118, + "loss": 0.7254, + "step": 11100 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019137537611868722, + "loss": 0.8516, + "step": 11150 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019133670067527324, + "loss": 0.8619, + "step": 11200 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019129802523185928, + "loss": 0.8323, + "step": 11250 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019125934978844533, + "loss": 0.802, + "step": 11300 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019122067434503137, + "loss": 0.9431, + "step": 11350 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001911819989016174, + "loss": 0.833, + "step": 11400 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019114332345820345, + "loss": 0.8785, + "step": 11450 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001911046480147895, + "loss": 0.894, + "step": 11500 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019106597257137554, + "loss": 0.9139, + "step": 11550 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019102729712796158, + "loss": 0.7368, + "step": 11600 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019098862168454763, + "loss": 0.8103, + "step": 11650 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019094994624113367, + "loss": 0.8547, + "step": 11700 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001909112707977197, + "loss": 0.7595, + "step": 11750 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019087259535430576, + "loss": 0.7432, + "step": 11800 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001908339199108918, + "loss": 0.8002, + "step": 11850 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019079524446747784, + "loss": 0.7238, + "step": 11900 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019075656902406386, + "loss": 0.7368, + "step": 11950 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001907178935806499, + "loss": 0.7619, + "step": 12000 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019067921813723594, + "loss": 0.8555, + "step": 12050 + }, + { + "epoch": 0.09, + "learning_rate": 0.000190640542693822, + "loss": 0.7968, + "step": 12100 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019060186725040803, + "loss": 0.6167, + "step": 12150 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019056319180699407, + "loss": 0.7268, + "step": 12200 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019052451636358012, + "loss": 0.7624, + "step": 12250 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019048584092016616, + "loss": 0.7498, + "step": 12300 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001904471654767522, + "loss": 0.7913, + "step": 12350 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019040849003333825, + "loss": 0.9818, + "step": 12400 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001903698145899243, + "loss": 0.9653, + "step": 12450 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019033113914651033, + "loss": 0.8576, + "step": 12500 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019029246370309637, + "loss": 0.7793, + "step": 12550 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019025378825968242, + "loss": 0.9986, + "step": 12600 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019021511281626846, + "loss": 0.6204, + "step": 12650 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019017643737285448, + "loss": 0.7432, + "step": 12700 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019013776192944052, + "loss": 0.7728, + "step": 12750 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019009908648602656, + "loss": 0.924, + "step": 12800 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001900604110426126, + "loss": 0.8306, + "step": 12850 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019002173559919865, + "loss": 0.9331, + "step": 12900 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001899830601557847, + "loss": 0.9156, + "step": 12950 + }, + { + "epoch": 0.1, + "learning_rate": 0.00018994438471237073, + "loss": 0.7475, + "step": 13000 + }, + { + "epoch": 0.1, + "learning_rate": 0.00018990570926895678, + "loss": 0.8014, + "step": 13050 + }, + { + "epoch": 0.1, + "learning_rate": 0.00018986703382554282, + "loss": 0.7636, + "step": 13100 + }, + { + "epoch": 0.1, + "learning_rate": 0.00018982835838212886, + "loss": 0.8878, + "step": 13150 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001897896829387149, + "loss": 0.7146, + "step": 13200 + }, + { + "epoch": 0.1, + "learning_rate": 0.00018975100749530095, + "loss": 0.7577, + "step": 13250 + }, + { + "epoch": 0.1, + "learning_rate": 0.000189712332051887, + "loss": 0.9388, + "step": 13300 + }, + { + "epoch": 0.1, + "learning_rate": 0.00018967365660847304, + "loss": 0.7735, + "step": 13350 + }, + { + "epoch": 0.1, + "learning_rate": 0.00018963498116505908, + "loss": 0.6801, + "step": 13400 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001895963057216451, + "loss": 0.7908, + "step": 13450 + }, + { + "epoch": 0.1, + "learning_rate": 0.00018955763027823114, + "loss": 0.7054, + "step": 13500 + }, + { + "epoch": 0.1, + "learning_rate": 0.00018951895483481718, + "loss": 0.8082, + "step": 13550 + }, + { + "epoch": 0.11, + "learning_rate": 0.00018948027939140322, + "loss": 0.7959, + "step": 13600 + }, + { + "epoch": 0.11, + "learning_rate": 0.00018944160394798927, + "loss": 0.8319, + "step": 13650 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001894029285045753, + "loss": 0.7559, + "step": 13700 + }, + { + "epoch": 0.11, + "learning_rate": 0.00018936425306116135, + "loss": 0.6439, + "step": 13750 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001893255776177474, + "loss": 0.7906, + "step": 13800 + }, + { + "epoch": 0.11, + "learning_rate": 0.00018928690217433344, + "loss": 0.9517, + "step": 13850 + }, + { + "epoch": 0.11, + "learning_rate": 0.00018924822673091948, + "loss": 0.8082, + "step": 13900 + }, + { + "epoch": 0.11, + "learning_rate": 0.00018920955128750552, + "loss": 0.8872, + "step": 13950 + }, + { + "epoch": 0.11, + "learning_rate": 0.00018917087584409157, + "loss": 0.6533, + "step": 14000 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001891322004006776, + "loss": 0.8846, + "step": 14050 + }, + { + "epoch": 0.11, + "learning_rate": 0.00018909352495726365, + "loss": 0.7644, + "step": 14100 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001890548495138497, + "loss": 0.9197, + "step": 14150 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001890161740704357, + "loss": 0.8356, + "step": 14200 + }, + { + "epoch": 0.11, + "learning_rate": 0.00018897749862702176, + "loss": 0.7626, + "step": 14250 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001889388231836078, + "loss": 0.7978, + "step": 14300 + }, + { + "epoch": 0.11, + "learning_rate": 0.00018890014774019384, + "loss": 0.9382, + "step": 14350 + }, + { + "epoch": 0.11, + "learning_rate": 0.00018886147229677988, + "loss": 0.8213, + "step": 14400 + }, + { + "epoch": 0.11, + "learning_rate": 0.00018882279685336593, + "loss": 0.8098, + "step": 14450 + }, + { + "epoch": 0.11, + "learning_rate": 0.00018878412140995197, + "loss": 0.6624, + "step": 14500 + }, + { + "epoch": 0.11, + "learning_rate": 0.00018874544596653801, + "loss": 0.6901, + "step": 14550 + }, + { + "epoch": 0.11, + "learning_rate": 0.00018870677052312406, + "loss": 0.8449, + "step": 14600 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001886680950797101, + "loss": 0.992, + "step": 14650 + }, + { + "epoch": 0.11, + "learning_rate": 0.00018862941963629614, + "loss": 0.8163, + "step": 14700 + }, + { + "epoch": 0.11, + "learning_rate": 0.00018859074419288219, + "loss": 0.9079, + "step": 14750 + }, + { + "epoch": 0.11, + "learning_rate": 0.00018855206874946823, + "loss": 0.6967, + "step": 14800 + }, + { + "epoch": 0.11, + "learning_rate": 0.00018851339330605427, + "loss": 0.7634, + "step": 14850 + }, + { + "epoch": 0.12, + "learning_rate": 0.00018847471786264032, + "loss": 0.881, + "step": 14900 + }, + { + "epoch": 0.12, + "learning_rate": 0.00018843604241922633, + "loss": 0.9108, + "step": 14950 + }, + { + "epoch": 0.12, + "learning_rate": 0.00018839736697581237, + "loss": 0.7132, + "step": 15000 + }, + { + "epoch": 0.12, + "learning_rate": 0.00018835869153239842, + "loss": 0.5067, + "step": 15050 + }, + { + "epoch": 0.12, + "learning_rate": 0.00018832001608898446, + "loss": 1.1357, + "step": 15100 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001882813406455705, + "loss": 0.7256, + "step": 15150 + }, + { + "epoch": 0.12, + "learning_rate": 0.00018824266520215655, + "loss": 0.6846, + "step": 15200 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001882039897587426, + "loss": 0.8358, + "step": 15250 + }, + { + "epoch": 0.12, + "learning_rate": 0.00018816531431532863, + "loss": 0.7776, + "step": 15300 + }, + { + "epoch": 0.12, + "learning_rate": 0.00018812663887191468, + "loss": 0.5573, + "step": 15350 + }, + { + "epoch": 0.12, + "learning_rate": 0.00018808796342850072, + "loss": 0.6548, + "step": 15400 + }, + { + "epoch": 0.12, + "learning_rate": 0.00018804928798508676, + "loss": 0.7813, + "step": 15450 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001880106125416728, + "loss": 0.853, + "step": 15500 + }, + { + "epoch": 0.12, + "learning_rate": 0.00018797193709825885, + "loss": 0.757, + "step": 15550 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001879332616548449, + "loss": 0.7511, + "step": 15600 + }, + { + "epoch": 0.12, + "learning_rate": 0.00018789458621143093, + "loss": 0.8809, + "step": 15650 + }, + { + "epoch": 0.12, + "learning_rate": 0.00018785591076801698, + "loss": 0.6439, + "step": 15700 + }, + { + "epoch": 0.12, + "learning_rate": 0.000187817235324603, + "loss": 0.6401, + "step": 15750 + }, + { + "epoch": 0.12, + "learning_rate": 0.00018777855988118904, + "loss": 0.9463, + "step": 15800 + }, + { + "epoch": 0.12, + "learning_rate": 0.00018773988443777508, + "loss": 0.7206, + "step": 15850 + }, + { + "epoch": 0.12, + "learning_rate": 0.00018770120899436112, + "loss": 0.738, + "step": 15900 + }, + { + "epoch": 0.12, + "learning_rate": 0.00018766253355094716, + "loss": 0.8078, + "step": 15950 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001876238581075332, + "loss": 0.8814, + "step": 16000 + }, + { + "epoch": 0.12, + "learning_rate": 0.00018758518266411925, + "loss": 0.7841, + "step": 16050 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001875465072207053, + "loss": 0.9534, + "step": 16100 + }, + { + "epoch": 0.12, + "learning_rate": 0.00018750783177729134, + "loss": 0.7588, + "step": 16150 + }, + { + "epoch": 0.13, + "learning_rate": 0.00018746915633387738, + "loss": 0.7467, + "step": 16200 + }, + { + "epoch": 0.13, + "learning_rate": 0.00018743048089046342, + "loss": 0.7402, + "step": 16250 + }, + { + "epoch": 0.13, + "learning_rate": 0.00018739180544704947, + "loss": 0.7391, + "step": 16300 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001873531300036355, + "loss": 0.93, + "step": 16350 + }, + { + "epoch": 0.13, + "learning_rate": 0.00018731445456022155, + "loss": 0.673, + "step": 16400 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001872757791168076, + "loss": 0.8719, + "step": 16450 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001872371036733936, + "loss": 0.7977, + "step": 16500 + }, + { + "epoch": 0.13, + "learning_rate": 0.00018719842822997965, + "loss": 0.8446, + "step": 16550 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001871597527865657, + "loss": 0.5509, + "step": 16600 + }, + { + "epoch": 0.13, + "learning_rate": 0.00018712107734315174, + "loss": 0.7187, + "step": 16650 + }, + { + "epoch": 0.13, + "learning_rate": 0.00018708240189973778, + "loss": 0.7886, + "step": 16700 + }, + { + "epoch": 0.13, + "learning_rate": 0.00018704372645632383, + "loss": 0.787, + "step": 16750 + }, + { + "epoch": 0.13, + "learning_rate": 0.00018700505101290987, + "loss": 0.8182, + "step": 16800 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001869663755694959, + "loss": 0.7996, + "step": 16850 + }, + { + "epoch": 0.13, + "learning_rate": 0.00018692770012608195, + "loss": 1.0537, + "step": 16900 + }, + { + "epoch": 0.13, + "learning_rate": 0.000186889024682668, + "loss": 0.7795, + "step": 16950 + }, + { + "epoch": 0.13, + "learning_rate": 0.00018685034923925404, + "loss": 0.6382, + "step": 17000 + }, + { + "epoch": 0.13, + "learning_rate": 0.00018681167379584008, + "loss": 0.8503, + "step": 17050 + }, + { + "epoch": 0.13, + "learning_rate": 0.00018677299835242613, + "loss": 0.7172, + "step": 17100 + }, + { + "epoch": 0.13, + "learning_rate": 0.00018673432290901217, + "loss": 0.8269, + "step": 17150 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001866956474655982, + "loss": 0.8608, + "step": 17200 + }, + { + "epoch": 0.13, + "learning_rate": 0.00018665697202218423, + "loss": 0.5488, + "step": 17250 + }, + { + "epoch": 0.13, + "learning_rate": 0.00018661829657877027, + "loss": 0.6198, + "step": 17300 + }, + { + "epoch": 0.13, + "learning_rate": 0.00018657962113535631, + "loss": 0.6294, + "step": 17350 + }, + { + "epoch": 0.13, + "learning_rate": 0.00018654094569194236, + "loss": 0.6832, + "step": 17400 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001865022702485284, + "loss": 0.7857, + "step": 17450 + }, + { + "epoch": 0.14, + "learning_rate": 0.00018646359480511444, + "loss": 0.873, + "step": 17500 + }, + { + "epoch": 0.14, + "learning_rate": 0.00018642491936170051, + "loss": 0.8379, + "step": 17550 + }, + { + "epoch": 0.14, + "learning_rate": 0.00018638624391828653, + "loss": 0.716, + "step": 17600 + }, + { + "epoch": 0.14, + "learning_rate": 0.00018634756847487257, + "loss": 0.7123, + "step": 17650 + }, + { + "epoch": 0.14, + "learning_rate": 0.00018630889303145862, + "loss": 0.7582, + "step": 17700 + }, + { + "epoch": 0.14, + "learning_rate": 0.00018627021758804466, + "loss": 0.8126, + "step": 17750 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001862315421446307, + "loss": 0.8564, + "step": 17800 + }, + { + "epoch": 0.14, + "learning_rate": 0.00018619286670121674, + "loss": 0.5869, + "step": 17850 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001861541912578028, + "loss": 0.7508, + "step": 17900 + }, + { + "epoch": 0.14, + "learning_rate": 0.00018611551581438883, + "loss": 0.7061, + "step": 17950 + }, + { + "epoch": 0.14, + "learning_rate": 0.00018607684037097485, + "loss": 0.7345, + "step": 18000 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001860381649275609, + "loss": 0.5775, + "step": 18050 + }, + { + "epoch": 0.14, + "learning_rate": 0.00018599948948414693, + "loss": 0.7817, + "step": 18100 + }, + { + "epoch": 0.14, + "learning_rate": 0.00018596081404073298, + "loss": 0.7201, + "step": 18150 + }, + { + "epoch": 0.14, + "learning_rate": 0.00018592213859731902, + "loss": 0.8352, + "step": 18200 + }, + { + "epoch": 0.14, + "learning_rate": 0.00018588346315390506, + "loss": 0.7986, + "step": 18250 + }, + { + "epoch": 0.14, + "learning_rate": 0.00018584478771049113, + "loss": 0.5892, + "step": 18300 + }, + { + "epoch": 0.14, + "learning_rate": 0.00018580611226707715, + "loss": 0.6573, + "step": 18350 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001857674368236632, + "loss": 0.7291, + "step": 18400 + }, + { + "epoch": 0.14, + "learning_rate": 0.00018572876138024923, + "loss": 0.8477, + "step": 18450 + }, + { + "epoch": 0.14, + "learning_rate": 0.00018569008593683528, + "loss": 0.7634, + "step": 18500 + }, + { + "epoch": 0.14, + "learning_rate": 0.00018565141049342132, + "loss": 0.5596, + "step": 18550 + }, + { + "epoch": 0.14, + "learning_rate": 0.00018561273505000736, + "loss": 0.7536, + "step": 18600 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001855740596065934, + "loss": 0.8015, + "step": 18650 + }, + { + "epoch": 0.14, + "learning_rate": 0.00018553538416317945, + "loss": 0.9044, + "step": 18700 + }, + { + "epoch": 0.15, + "learning_rate": 0.00018549670871976547, + "loss": 0.7212, + "step": 18750 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001854580332763515, + "loss": 0.6835, + "step": 18800 + }, + { + "epoch": 0.15, + "learning_rate": 0.00018541935783293755, + "loss": 0.6431, + "step": 18850 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001853806823895236, + "loss": 0.6776, + "step": 18900 + }, + { + "epoch": 0.15, + "learning_rate": 0.00018534200694610964, + "loss": 0.8134, + "step": 18950 + }, + { + "epoch": 0.15, + "learning_rate": 0.00018530333150269568, + "loss": 0.7613, + "step": 19000 + }, + { + "epoch": 0.15, + "learning_rate": 0.00018526465605928175, + "loss": 0.6909, + "step": 19050 + }, + { + "epoch": 0.15, + "learning_rate": 0.00018522598061586777, + "loss": 0.5647, + "step": 19100 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001851873051724538, + "loss": 0.845, + "step": 19150 + }, + { + "epoch": 0.15, + "learning_rate": 0.00018514862972903985, + "loss": 0.6676, + "step": 19200 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001851099542856259, + "loss": 0.608, + "step": 19250 + }, + { + "epoch": 0.15, + "learning_rate": 0.00018507127884221194, + "loss": 0.6545, + "step": 19300 + }, + { + "epoch": 0.15, + "learning_rate": 0.00018503260339879798, + "loss": 0.8084, + "step": 19350 + }, + { + "epoch": 0.15, + "learning_rate": 0.00018499392795538402, + "loss": 0.9323, + "step": 19400 + }, + { + "epoch": 0.15, + "learning_rate": 0.00018495525251197007, + "loss": 0.7761, + "step": 19450 + }, + { + "epoch": 0.15, + "learning_rate": 0.00018491657706855608, + "loss": 0.7525, + "step": 19500 + }, + { + "epoch": 0.15, + "learning_rate": 0.00018487790162514213, + "loss": 0.7387, + "step": 19550 + }, + { + "epoch": 0.15, + "learning_rate": 0.00018483922618172817, + "loss": 0.7412, + "step": 19600 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001848005507383142, + "loss": 0.6455, + "step": 19650 + }, + { + "epoch": 0.15, + "learning_rate": 0.00018476187529490026, + "loss": 0.6401, + "step": 19700 + }, + { + "epoch": 0.15, + "learning_rate": 0.00018472319985148633, + "loss": 0.7524, + "step": 19750 + }, + { + "epoch": 0.15, + "learning_rate": 0.00018468452440807237, + "loss": 0.8381, + "step": 19800 + }, + { + "epoch": 0.15, + "learning_rate": 0.00018464584896465838, + "loss": 0.7317, + "step": 19850 + }, + { + "epoch": 0.15, + "learning_rate": 0.00018460717352124443, + "loss": 0.7321, + "step": 19900 + }, + { + "epoch": 0.15, + "learning_rate": 0.00018456849807783047, + "loss": 0.8627, + "step": 19950 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001845298226344165, + "loss": 0.8806, + "step": 20000 + }, + { + "epoch": 0.16, + "learning_rate": 0.00018449114719100256, + "loss": 0.6949, + "step": 20050 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001844524717475886, + "loss": 0.8466, + "step": 20100 + }, + { + "epoch": 0.16, + "learning_rate": 0.00018441379630417464, + "loss": 0.468, + "step": 20150 + }, + { + "epoch": 0.16, + "learning_rate": 0.00018437512086076069, + "loss": 0.8107, + "step": 20200 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001843364454173467, + "loss": 0.8214, + "step": 20250 + }, + { + "epoch": 0.16, + "learning_rate": 0.00018429776997393274, + "loss": 0.6371, + "step": 20300 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001842590945305188, + "loss": 0.8139, + "step": 20350 + }, + { + "epoch": 0.16, + "learning_rate": 0.00018422041908710483, + "loss": 0.8821, + "step": 20400 + }, + { + "epoch": 0.16, + "learning_rate": 0.00018418174364369087, + "loss": 0.749, + "step": 20450 + }, + { + "epoch": 0.16, + "learning_rate": 0.00018414306820027694, + "loss": 0.7666, + "step": 20500 + }, + { + "epoch": 0.16, + "learning_rate": 0.000184104392756863, + "loss": 0.804, + "step": 20550 + }, + { + "epoch": 0.16, + "learning_rate": 0.000184065717313449, + "loss": 0.8258, + "step": 20600 + }, + { + "epoch": 0.16, + "learning_rate": 0.00018402704187003505, + "loss": 0.6565, + "step": 20650 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001839883664266211, + "loss": 0.6481, + "step": 20700 + }, + { + "epoch": 0.16, + "learning_rate": 0.00018394969098320713, + "loss": 0.5938, + "step": 20750 + }, + { + "epoch": 0.16, + "learning_rate": 0.00018391101553979317, + "loss": 0.6611, + "step": 20800 + }, + { + "epoch": 0.16, + "learning_rate": 0.00018387234009637922, + "loss": 0.9062, + "step": 20850 + }, + { + "epoch": 0.16, + "learning_rate": 0.00018383366465296526, + "loss": 0.6141, + "step": 20900 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001837949892095513, + "loss": 0.6457, + "step": 20950 + }, + { + "epoch": 0.16, + "learning_rate": 0.00018375631376613735, + "loss": 0.5349, + "step": 21000 + }, + { + "epoch": 0.16, + "learning_rate": 0.00018371763832272336, + "loss": 0.6687, + "step": 21050 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001836789628793094, + "loss": 1.0448, + "step": 21100 + }, + { + "epoch": 0.16, + "learning_rate": 0.00018364028743589545, + "loss": 0.8059, + "step": 21150 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001836016119924815, + "loss": 0.6748, + "step": 21200 + }, + { + "epoch": 0.16, + "learning_rate": 0.00018356293654906756, + "loss": 0.5979, + "step": 21250 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001835242611056536, + "loss": 0.8469, + "step": 21300 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018348558566223962, + "loss": 0.7463, + "step": 21350 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018344691021882566, + "loss": 0.7493, + "step": 21400 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001834082347754117, + "loss": 0.8654, + "step": 21450 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018336955933199775, + "loss": 0.7216, + "step": 21500 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001833308838885838, + "loss": 0.7847, + "step": 21550 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018329220844516984, + "loss": 0.5339, + "step": 21600 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018325353300175588, + "loss": 0.7045, + "step": 21650 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018321485755834192, + "loss": 0.6995, + "step": 21700 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018317618211492797, + "loss": 0.736, + "step": 21750 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018313750667151398, + "loss": 0.7212, + "step": 21800 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018309883122810002, + "loss": 0.6062, + "step": 21850 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018306015578468607, + "loss": 0.889, + "step": 21900 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001830214803412721, + "loss": 0.6812, + "step": 21950 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018298280489785818, + "loss": 0.7713, + "step": 22000 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018294412945444422, + "loss": 0.7462, + "step": 22050 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018290545401103027, + "loss": 0.5084, + "step": 22100 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018286677856761628, + "loss": 0.6875, + "step": 22150 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018282810312420233, + "loss": 0.8552, + "step": 22200 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018278942768078837, + "loss": 0.7549, + "step": 22250 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001827507522373744, + "loss": 0.6307, + "step": 22300 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018271207679396045, + "loss": 1.0293, + "step": 22350 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001826734013505465, + "loss": 0.7603, + "step": 22400 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018263472590713254, + "loss": 0.5218, + "step": 22450 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018259605046371858, + "loss": 0.5962, + "step": 22500 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001825573750203046, + "loss": 0.7793, + "step": 22550 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018251869957689064, + "loss": 0.6511, + "step": 22600 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018248002413347669, + "loss": 0.6589, + "step": 22650 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018244134869006273, + "loss": 0.826, + "step": 22700 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001824026732466488, + "loss": 0.7561, + "step": 22750 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018236399780323484, + "loss": 0.7605, + "step": 22800 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018232532235982088, + "loss": 0.7887, + "step": 22850 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001822866469164069, + "loss": 0.6065, + "step": 22900 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018224797147299294, + "loss": 0.7631, + "step": 22950 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018220929602957899, + "loss": 0.6708, + "step": 23000 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018217062058616503, + "loss": 0.8115, + "step": 23050 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018213194514275107, + "loss": 0.5469, + "step": 23100 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018209326969933712, + "loss": 0.751, + "step": 23150 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018205459425592316, + "loss": 0.6424, + "step": 23200 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001820159188125092, + "loss": 0.7017, + "step": 23250 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018197724336909522, + "loss": 0.615, + "step": 23300 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018193856792568126, + "loss": 0.9175, + "step": 23350 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001818998924822673, + "loss": 0.9438, + "step": 23400 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018186121703885335, + "loss": 0.8183, + "step": 23450 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018182254159543942, + "loss": 0.9829, + "step": 23500 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018178386615202546, + "loss": 0.7703, + "step": 23550 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001817451907086115, + "loss": 0.6007, + "step": 23600 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018170651526519752, + "loss": 0.6095, + "step": 23650 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018166783982178356, + "loss": 0.6811, + "step": 23700 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001816291643783696, + "loss": 0.6791, + "step": 23750 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018159048893495565, + "loss": 0.8032, + "step": 23800 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001815518134915417, + "loss": 0.6968, + "step": 23850 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018151313804812773, + "loss": 0.7912, + "step": 23900 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018147446260471378, + "loss": 0.6557, + "step": 23950 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018143578716129982, + "loss": 0.7041, + "step": 24000 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018139711171788584, + "loss": 0.7028, + "step": 24050 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018135843627447188, + "loss": 0.5454, + "step": 24100 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018131976083105792, + "loss": 0.8485, + "step": 24150 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018128108538764396, + "loss": 0.6944, + "step": 24200 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018124240994423003, + "loss": 0.6429, + "step": 24250 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018120373450081608, + "loss": 0.9911, + "step": 24300 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018116505905740212, + "loss": 0.7624, + "step": 24350 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018112638361398814, + "loss": 0.8377, + "step": 24400 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018108770817057418, + "loss": 0.7377, + "step": 24450 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018104903272716022, + "loss": 0.8191, + "step": 24500 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018101035728374627, + "loss": 0.6292, + "step": 24550 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001809716818403323, + "loss": 0.7387, + "step": 24600 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018093300639691835, + "loss": 0.7166, + "step": 24650 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001808943309535044, + "loss": 0.787, + "step": 24700 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018085565551009044, + "loss": 0.6451, + "step": 24750 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018081698006667645, + "loss": 0.8142, + "step": 24800 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001807783046232625, + "loss": 0.9096, + "step": 24850 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018073962917984854, + "loss": 0.6828, + "step": 24900 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018070095373643458, + "loss": 0.5445, + "step": 24950 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018066227829302065, + "loss": 0.8181, + "step": 25000 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018047301300240095, + "loss": 1.124, + "step": 25050 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018043403698044964, + "loss": 1.1854, + "step": 25100 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018039506095849834, + "loss": 1.0665, + "step": 25150 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018035608493654706, + "loss": 0.8882, + "step": 25200 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018031710891459575, + "loss": 1.0253, + "step": 25250 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018027813289264445, + "loss": 1.0734, + "step": 25300 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018023915687069315, + "loss": 1.0606, + "step": 25350 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018020018084874187, + "loss": 0.9589, + "step": 25400 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018016120482679056, + "loss": 1.0562, + "step": 25450 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018012222880483926, + "loss": 0.8676, + "step": 25500 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018008325278288798, + "loss": 1.0997, + "step": 25550 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018004427676093668, + "loss": 0.9763, + "step": 25600 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018000530073898537, + "loss": 0.8347, + "step": 25650 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001799663247170341, + "loss": 0.9396, + "step": 25700 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001799273486950828, + "loss": 0.9281, + "step": 25750 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001798883726731315, + "loss": 0.9826, + "step": 25800 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001798493966511802, + "loss": 0.8583, + "step": 25850 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001798104206292289, + "loss": 0.8509, + "step": 25900 + }, + { + "epoch": 0.2, + "learning_rate": 0.00017977144460727763, + "loss": 0.8912, + "step": 25950 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001797324685853263, + "loss": 0.8786, + "step": 26000 + }, + { + "epoch": 0.2, + "learning_rate": 0.00017969349256337502, + "loss": 0.8482, + "step": 26050 + }, + { + "epoch": 0.2, + "learning_rate": 0.00017965451654142374, + "loss": 0.9426, + "step": 26100 + }, + { + "epoch": 0.2, + "learning_rate": 0.00017961554051947244, + "loss": 0.9505, + "step": 26150 + }, + { + "epoch": 0.2, + "learning_rate": 0.00017957656449752113, + "loss": 0.8555, + "step": 26200 + }, + { + "epoch": 0.2, + "learning_rate": 0.00017953758847556983, + "loss": 1.1169, + "step": 26250 + }, + { + "epoch": 0.21, + "learning_rate": 0.00017949861245361855, + "loss": 0.8806, + "step": 26300 + }, + { + "epoch": 0.21, + "learning_rate": 0.00017945963643166725, + "loss": 0.9295, + "step": 26350 + }, + { + "epoch": 0.21, + "learning_rate": 0.00017942066040971594, + "loss": 0.8931, + "step": 26400 + }, + { + "epoch": 0.21, + "learning_rate": 0.00017938168438776466, + "loss": 0.9139, + "step": 26450 + }, + { + "epoch": 0.21, + "learning_rate": 0.00017934270836581336, + "loss": 0.9318, + "step": 26500 + }, + { + "epoch": 0.21, + "learning_rate": 0.00017930373234386205, + "loss": 1.0256, + "step": 26550 + }, + { + "epoch": 0.21, + "learning_rate": 0.00017926475632191078, + "loss": 0.9042, + "step": 26600 + }, + { + "epoch": 0.21, + "learning_rate": 0.00017922578029995947, + "loss": 0.8945, + "step": 26650 + }, + { + "epoch": 0.21, + "learning_rate": 0.00017918680427800817, + "loss": 0.8622, + "step": 26700 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001791478282560569, + "loss": 0.8348, + "step": 26750 + }, + { + "epoch": 0.21, + "learning_rate": 0.00017910885223410559, + "loss": 1.0544, + "step": 26800 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001790698762121543, + "loss": 0.7097, + "step": 26850 + }, + { + "epoch": 0.21, + "learning_rate": 0.00017903090019020298, + "loss": 0.8808, + "step": 26900 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001789919241682517, + "loss": 0.896, + "step": 26950 + }, + { + "epoch": 0.21, + "learning_rate": 0.00017895294814630042, + "loss": 1.0487, + "step": 27000 + }, + { + "epoch": 0.21, + "learning_rate": 0.00017891397212434912, + "loss": 0.9996, + "step": 27050 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001788749961023978, + "loss": 0.9624, + "step": 27100 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001788360200804465, + "loss": 0.9344, + "step": 27150 + }, + { + "epoch": 0.21, + "learning_rate": 0.00017879704405849523, + "loss": 0.9103, + "step": 27200 + }, + { + "epoch": 0.21, + "learning_rate": 0.00017875806803654393, + "loss": 0.7311, + "step": 27250 + }, + { + "epoch": 0.21, + "learning_rate": 0.00017871909201459262, + "loss": 0.9748, + "step": 27300 + }, + { + "epoch": 0.21, + "learning_rate": 0.00017868011599264134, + "loss": 0.7231, + "step": 27350 + }, + { + "epoch": 0.21, + "learning_rate": 0.00017864113997069004, + "loss": 0.9844, + "step": 27400 + }, + { + "epoch": 0.21, + "learning_rate": 0.00017860216394873874, + "loss": 0.9322, + "step": 27450 + }, + { + "epoch": 0.21, + "learning_rate": 0.00017856318792678746, + "loss": 0.9103, + "step": 27500 + }, + { + "epoch": 0.21, + "learning_rate": 0.00017852421190483615, + "loss": 1.0132, + "step": 27550 + }, + { + "epoch": 0.22, + "learning_rate": 0.00017848523588288485, + "loss": 0.8617, + "step": 27600 + }, + { + "epoch": 0.22, + "learning_rate": 0.00017844625986093354, + "loss": 1.0296, + "step": 27650 + }, + { + "epoch": 0.22, + "learning_rate": 0.00017840728383898227, + "loss": 1.0048, + "step": 27700 + }, + { + "epoch": 0.22, + "learning_rate": 0.000178368307817031, + "loss": 1.1557, + "step": 27750 + }, + { + "epoch": 0.22, + "learning_rate": 0.00017832933179507966, + "loss": 0.7993, + "step": 27800 + }, + { + "epoch": 0.22, + "learning_rate": 0.00017829035577312838, + "loss": 1.002, + "step": 27850 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001782513797511771, + "loss": 1.0392, + "step": 27900 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001782124037292258, + "loss": 0.8991, + "step": 27950 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001781734277072745, + "loss": 0.8488, + "step": 28000 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001781344516853232, + "loss": 0.8418, + "step": 28050 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001780954756633719, + "loss": 1.02, + "step": 28100 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001780564996414206, + "loss": 1.0404, + "step": 28150 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001780175236194693, + "loss": 0.9571, + "step": 28200 + }, + { + "epoch": 0.22, + "learning_rate": 0.00017797854759751803, + "loss": 0.7724, + "step": 28250 + }, + { + "epoch": 0.22, + "learning_rate": 0.00017793957157556672, + "loss": 1.0129, + "step": 28300 + }, + { + "epoch": 0.22, + "learning_rate": 0.00017790059555361542, + "loss": 0.8916, + "step": 28350 + }, + { + "epoch": 0.22, + "learning_rate": 0.00017786161953166414, + "loss": 0.9504, + "step": 28400 + }, + { + "epoch": 0.22, + "learning_rate": 0.00017782264350971283, + "loss": 0.8393, + "step": 28450 + }, + { + "epoch": 0.22, + "learning_rate": 0.00017778366748776153, + "loss": 0.7675, + "step": 28500 + }, + { + "epoch": 0.22, + "learning_rate": 0.00017774469146581023, + "loss": 0.8273, + "step": 28550 + }, + { + "epoch": 0.22, + "learning_rate": 0.00017770571544385895, + "loss": 0.8967, + "step": 28600 + }, + { + "epoch": 0.22, + "learning_rate": 0.00017766673942190764, + "loss": 1.084, + "step": 28650 + }, + { + "epoch": 0.22, + "learning_rate": 0.00017762776339995634, + "loss": 0.7741, + "step": 28700 + }, + { + "epoch": 0.22, + "learning_rate": 0.00017758878737800506, + "loss": 1.1056, + "step": 28750 + }, + { + "epoch": 0.22, + "learning_rate": 0.00017754981135605378, + "loss": 1.183, + "step": 28800 + }, + { + "epoch": 0.22, + "learning_rate": 0.00017751083533410245, + "loss": 1.0375, + "step": 28850 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017747185931215118, + "loss": 1.2414, + "step": 28900 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017743288329019987, + "loss": 1.2209, + "step": 28950 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001773939072682486, + "loss": 0.8581, + "step": 29000 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001773549312462973, + "loss": 1.0284, + "step": 29050 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017731595522434598, + "loss": 0.8502, + "step": 29100 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001772769792023947, + "loss": 0.6947, + "step": 29150 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001772380031804434, + "loss": 0.7579, + "step": 29200 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001771990271584921, + "loss": 0.9771, + "step": 29250 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017716005113654082, + "loss": 0.8661, + "step": 29300 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017712107511458952, + "loss": 0.8433, + "step": 29350 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001770820990926382, + "loss": 0.9419, + "step": 29400 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001770431230706869, + "loss": 1.076, + "step": 29450 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017700414704873563, + "loss": 0.9966, + "step": 29500 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017696517102678433, + "loss": 0.7618, + "step": 29550 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017692619500483302, + "loss": 0.8269, + "step": 29600 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017688721898288174, + "loss": 0.8109, + "step": 29650 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017684824296093047, + "loss": 0.7426, + "step": 29700 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017680926693897913, + "loss": 0.9972, + "step": 29750 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017677029091702786, + "loss": 0.7991, + "step": 29800 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017673131489507655, + "loss": 0.7988, + "step": 29850 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017669233887312527, + "loss": 0.7364, + "step": 29900 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017665336285117397, + "loss": 1.0258, + "step": 29950 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017661438682922267, + "loss": 1.0606, + "step": 30000 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001765754108072714, + "loss": 0.909, + "step": 30050 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017653643478532008, + "loss": 0.8428, + "step": 30100 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017649745876336878, + "loss": 0.8707, + "step": 30150 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001764584827414175, + "loss": 0.7147, + "step": 30200 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001764195067194662, + "loss": 0.89, + "step": 30250 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001763805306975149, + "loss": 0.847, + "step": 30300 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001763415546755636, + "loss": 0.7826, + "step": 30350 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001763025786536123, + "loss": 0.8614, + "step": 30400 + }, + { + "epoch": 0.24, + "learning_rate": 0.000176263602631661, + "loss": 0.8078, + "step": 30450 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001762246266097097, + "loss": 0.872, + "step": 30500 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017618565058775842, + "loss": 0.8734, + "step": 30550 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017614667456580715, + "loss": 0.8836, + "step": 30600 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017610769854385582, + "loss": 1.042, + "step": 30650 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017606872252190454, + "loss": 0.8561, + "step": 30700 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017602974649995323, + "loss": 0.6824, + "step": 30750 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017599077047800196, + "loss": 0.9277, + "step": 30800 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017595179445605065, + "loss": 0.9887, + "step": 30850 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017591281843409935, + "loss": 0.7925, + "step": 30900 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017587384241214807, + "loss": 0.8944, + "step": 30950 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017583486639019674, + "loss": 1.2576, + "step": 31000 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017579589036824546, + "loss": 1.0837, + "step": 31050 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017575691434629418, + "loss": 1.0205, + "step": 31100 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017571793832434288, + "loss": 1.1691, + "step": 31150 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017567896230239157, + "loss": 0.9541, + "step": 31200 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017563998628044027, + "loss": 0.7735, + "step": 31250 + }, + { + "epoch": 0.24, + "learning_rate": 0.000175601010258489, + "loss": 0.9219, + "step": 31300 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001755620342365377, + "loss": 0.6201, + "step": 31350 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017552305821458638, + "loss": 0.8204, + "step": 31400 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001754840821926351, + "loss": 0.7272, + "step": 31450 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001754451061706838, + "loss": 0.864, + "step": 31500 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001754061301487325, + "loss": 0.9935, + "step": 31550 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017536715412678122, + "loss": 0.8651, + "step": 31600 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017532817810482992, + "loss": 0.8508, + "step": 31650 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001752892020828786, + "loss": 0.863, + "step": 31700 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017525022606092733, + "loss": 0.9272, + "step": 31750 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017521125003897603, + "loss": 0.9609, + "step": 31800 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017517227401702475, + "loss": 1.1736, + "step": 31850 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017513329799507342, + "loss": 0.8571, + "step": 31900 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017509432197312214, + "loss": 0.758, + "step": 31950 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017505534595117086, + "loss": 1.0157, + "step": 32000 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017501636992921956, + "loss": 0.762, + "step": 32050 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017497739390726826, + "loss": 0.7206, + "step": 32100 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017493841788531695, + "loss": 0.9902, + "step": 32150 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017489944186336567, + "loss": 0.8943, + "step": 32200 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017486046584141437, + "loss": 0.9721, + "step": 32250 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017482148981946306, + "loss": 0.9522, + "step": 32300 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001747825137975118, + "loss": 0.9819, + "step": 32350 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017474353777556048, + "loss": 1.0563, + "step": 32400 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017470456175360918, + "loss": 0.782, + "step": 32450 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001746655857316579, + "loss": 0.9609, + "step": 32500 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001746266097097066, + "loss": 0.9329, + "step": 32550 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001745876336877553, + "loss": 0.7019, + "step": 32600 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017454865766580401, + "loss": 0.9395, + "step": 32650 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001745096816438527, + "loss": 0.7248, + "step": 32700 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017447070562190143, + "loss": 0.7116, + "step": 32750 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001744317295999501, + "loss": 0.92, + "step": 32800 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017439275357799882, + "loss": 0.8105, + "step": 32850 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017435377755604755, + "loss": 0.8492, + "step": 32900 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017431480153409624, + "loss": 0.8305, + "step": 32950 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017427582551214494, + "loss": 0.882, + "step": 33000 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016390108569805799, + "loss": 40.3284, + "step": 33050 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016384647311968892, + "loss": 41.3929, + "step": 33100 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001637918605413199, + "loss": 40.5771, + "step": 33150 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016373724796295083, + "loss": 40.1587, + "step": 33200 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001636826353845818, + "loss": 38.4849, + "step": 33250 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016362802280621276, + "loss": 40.8953, + "step": 33300 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001635734102278437, + "loss": 41.1837, + "step": 33350 + }, + { + "epoch": 0.36, + "learning_rate": 0.00016351879764947463, + "loss": 41.4111, + "step": 33400 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001634641850711056, + "loss": 39.7779, + "step": 33450 + }, + { + "epoch": 0.37, + "learning_rate": 0.00016340957249273653, + "loss": 39.6051, + "step": 33500 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001633549599143675, + "loss": 39.1987, + "step": 33550 + }, + { + "epoch": 0.37, + "learning_rate": 0.00016330034733599844, + "loss": 36.4834, + "step": 33600 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001632457347576294, + "loss": 38.9442, + "step": 33650 + }, + { + "epoch": 0.37, + "learning_rate": 0.00016319112217926034, + "loss": 38.7699, + "step": 33700 + }, + { + "epoch": 0.37, + "learning_rate": 0.00016313650960089128, + "loss": 38.1662, + "step": 33750 + }, + { + "epoch": 0.37, + "learning_rate": 0.00016308189702252224, + "loss": 38.3107, + "step": 33800 + }, + { + "epoch": 0.37, + "learning_rate": 0.00016302728444415318, + "loss": 37.137, + "step": 33850 + }, + { + "epoch": 0.37, + "learning_rate": 0.00016297267186578414, + "loss": 39.8413, + "step": 33900 + }, + { + "epoch": 0.37, + "learning_rate": 0.00016291805928741508, + "loss": 37.834, + "step": 33950 + }, + { + "epoch": 0.37, + "learning_rate": 0.00016286344670904602, + "loss": 38.752, + "step": 34000 + }, + { + "epoch": 0.37, + "learning_rate": 0.00016280883413067698, + "loss": 38.9749, + "step": 34050 + }, + { + "epoch": 0.37, + "learning_rate": 0.00016275422155230792, + "loss": 37.0203, + "step": 34100 + }, + { + "epoch": 0.37, + "learning_rate": 0.00016269960897393888, + "loss": 37.8575, + "step": 34150 + }, + { + "epoch": 0.37, + "learning_rate": 0.00016264499639556982, + "loss": 36.1197, + "step": 34200 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001625903838172008, + "loss": 38.9567, + "step": 34250 + }, + { + "epoch": 0.37, + "learning_rate": 0.00016253577123883172, + "loss": 36.921, + "step": 34300 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001624811586604627, + "loss": 37.7047, + "step": 34350 + }, + { + "epoch": 0.38, + "learning_rate": 0.00016242654608209365, + "loss": 37.8749, + "step": 34400 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001623719335037246, + "loss": 36.0547, + "step": 34450 + }, + { + "epoch": 0.38, + "learning_rate": 0.00016231732092535553, + "loss": 35.7079, + "step": 34500 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001622627083469865, + "loss": 35.5162, + "step": 34550 + }, + { + "epoch": 0.38, + "learning_rate": 0.00016220809576861743, + "loss": 35.6316, + "step": 34600 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001621534831902484, + "loss": 37.1081, + "step": 34650 + }, + { + "epoch": 0.38, + "learning_rate": 0.00016209887061187933, + "loss": 35.4266, + "step": 34700 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001620442580335103, + "loss": 35.8718, + "step": 34750 + }, + { + "epoch": 0.38, + "learning_rate": 0.00016198964545514124, + "loss": 34.2143, + "step": 34800 + }, + { + "epoch": 0.38, + "learning_rate": 0.00016193503287677217, + "loss": 34.0882, + "step": 34850 + }, + { + "epoch": 0.38, + "learning_rate": 0.00016188042029840314, + "loss": 32.8758, + "step": 34900 + }, + { + "epoch": 0.38, + "learning_rate": 0.00016182580772003408, + "loss": 32.0339, + "step": 34950 + }, + { + "epoch": 0.38, + "learning_rate": 0.00016177119514166504, + "loss": 31.4164, + "step": 35000 + }, + { + "epoch": 0.38, + "learning_rate": 0.00016171658256329598, + "loss": 31.8205, + "step": 35050 + }, + { + "epoch": 0.38, + "learning_rate": 0.00016166196998492694, + "loss": 32.6587, + "step": 35100 + }, + { + "epoch": 0.38, + "learning_rate": 0.00016160735740655788, + "loss": 31.8695, + "step": 35150 + }, + { + "epoch": 0.38, + "learning_rate": 0.00016155274482818882, + "loss": 31.0461, + "step": 35200 + }, + { + "epoch": 0.39, + "learning_rate": 0.00016149813224981978, + "loss": 30.1198, + "step": 35250 + }, + { + "epoch": 0.39, + "learning_rate": 0.00016144351967145072, + "loss": 28.9032, + "step": 35300 + }, + { + "epoch": 0.39, + "learning_rate": 0.00016138890709308169, + "loss": 30.3631, + "step": 35350 + }, + { + "epoch": 0.39, + "learning_rate": 0.00016133429451471265, + "loss": 29.2617, + "step": 35400 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001612796819363436, + "loss": 28.4782, + "step": 35450 + }, + { + "epoch": 0.39, + "learning_rate": 0.00016122506935797455, + "loss": 28.6378, + "step": 35500 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001611704567796055, + "loss": 28.3341, + "step": 35550 + }, + { + "epoch": 0.39, + "learning_rate": 0.00016111584420123645, + "loss": 27.6153, + "step": 35600 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001610612316228674, + "loss": 26.5044, + "step": 35650 + }, + { + "epoch": 0.39, + "learning_rate": 0.00016100661904449833, + "loss": 26.8876, + "step": 35700 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001609520064661293, + "loss": 26.9291, + "step": 35750 + }, + { + "epoch": 0.39, + "learning_rate": 0.00016089739388776023, + "loss": 24.024, + "step": 35800 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001608427813093912, + "loss": 24.5533, + "step": 35850 + }, + { + "epoch": 0.39, + "learning_rate": 0.00016078816873102213, + "loss": 24.6948, + "step": 35900 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001607335561526531, + "loss": 22.2483, + "step": 35950 + }, + { + "epoch": 0.39, + "learning_rate": 0.00016067894357428404, + "loss": 21.7253, + "step": 36000 + }, + { + "epoch": 0.39, + "learning_rate": 0.00016062433099591497, + "loss": 20.7581, + "step": 36050 + }, + { + "epoch": 0.39, + "learning_rate": 0.00016056971841754594, + "loss": 18.6484, + "step": 36100 + }, + { + "epoch": 0.39, + "learning_rate": 0.00016051510583917688, + "loss": 19.3484, + "step": 36150 + }, + { + "epoch": 0.4, + "learning_rate": 0.00016046049326080784, + "loss": 15.8305, + "step": 36200 + }, + { + "epoch": 0.4, + "learning_rate": 0.00016040588068243878, + "loss": 16.644, + "step": 36250 + }, + { + "epoch": 0.4, + "learning_rate": 0.00016035126810406972, + "loss": 16.1415, + "step": 36300 + }, + { + "epoch": 0.4, + "learning_rate": 0.00016029665552570068, + "loss": 16.2331, + "step": 36350 + }, + { + "epoch": 0.4, + "learning_rate": 0.00016024204294733162, + "loss": 13.7222, + "step": 36400 + }, + { + "epoch": 0.4, + "learning_rate": 0.00016018743036896258, + "loss": 13.1968, + "step": 36450 + }, + { + "epoch": 0.4, + "learning_rate": 0.00016013281779059355, + "loss": 13.7183, + "step": 36500 + }, + { + "epoch": 0.4, + "learning_rate": 0.00016007820521222449, + "loss": 13.6719, + "step": 36550 + }, + { + "epoch": 0.4, + "learning_rate": 0.00016002359263385545, + "loss": 12.565, + "step": 36600 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001599689800554864, + "loss": 11.7014, + "step": 36650 + }, + { + "epoch": 0.4, + "learning_rate": 0.00015991436747711735, + "loss": 11.9391, + "step": 36700 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001598597548987483, + "loss": 10.8187, + "step": 36750 + }, + { + "epoch": 0.4, + "learning_rate": 0.00015980514232037923, + "loss": 9.9151, + "step": 36800 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001597505297420102, + "loss": 8.8924, + "step": 36850 + }, + { + "epoch": 0.4, + "learning_rate": 0.00015969591716364113, + "loss": 7.7144, + "step": 36900 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001596413045852721, + "loss": 6.7915, + "step": 36950 + }, + { + "epoch": 0.4, + "learning_rate": 0.00015958669200690303, + "loss": 6.1585, + "step": 37000 + }, + { + "epoch": 0.4, + "learning_rate": 0.000159532079428534, + "loss": 6.4101, + "step": 37050 + }, + { + "epoch": 0.41, + "learning_rate": 0.00015947746685016494, + "loss": 4.6158, + "step": 37100 + }, + { + "epoch": 0.41, + "learning_rate": 0.00015942285427179587, + "loss": 4.76, + "step": 37150 + }, + { + "epoch": 0.41, + "learning_rate": 0.00015936824169342684, + "loss": 4.0994, + "step": 37200 + }, + { + "epoch": 0.41, + "learning_rate": 0.00015931362911505778, + "loss": 4.7396, + "step": 37250 + }, + { + "epoch": 0.41, + "learning_rate": 0.00015925901653668874, + "loss": 3.6542, + "step": 37300 + }, + { + "epoch": 0.41, + "learning_rate": 0.00015920440395831968, + "loss": 3.4333, + "step": 37350 + }, + { + "epoch": 0.41, + "learning_rate": 0.00015914979137995064, + "loss": 4.575, + "step": 37400 + }, + { + "epoch": 0.41, + "learning_rate": 0.00015909517880158158, + "loss": 3.3926, + "step": 37450 + }, + { + "epoch": 0.41, + "learning_rate": 0.00015904056622321252, + "loss": 3.3063, + "step": 37500 + }, + { + "epoch": 0.41, + "learning_rate": 0.00015898595364484348, + "loss": 2.9068, + "step": 37550 + }, + { + "epoch": 0.41, + "learning_rate": 0.00015893134106647445, + "loss": 2.9475, + "step": 37600 + }, + { + "epoch": 0.41, + "learning_rate": 0.00015887672848810538, + "loss": 2.94, + "step": 37650 + }, + { + "epoch": 0.41, + "learning_rate": 0.00015882211590973635, + "loss": 3.2924, + "step": 37700 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001587675033313673, + "loss": 3.4012, + "step": 37750 + }, + { + "epoch": 0.41, + "learning_rate": 0.00015871289075299825, + "loss": 2.8093, + "step": 37800 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001586582781746292, + "loss": 3.115, + "step": 37850 + }, + { + "epoch": 0.41, + "learning_rate": 0.00015860366559626015, + "loss": 2.4926, + "step": 37900 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001585490530178911, + "loss": 2.3319, + "step": 37950 + }, + { + "epoch": 0.42, + "learning_rate": 0.00015849444043952203, + "loss": 2.4095, + "step": 38000 + }, + { + "epoch": 0.42, + "learning_rate": 0.000158439827861153, + "loss": 2.563, + "step": 38050 + }, + { + "epoch": 0.42, + "learning_rate": 0.00015838521528278393, + "loss": 2.5545, + "step": 38100 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001583306027044149, + "loss": 2.0663, + "step": 38150 + }, + { + "epoch": 0.42, + "learning_rate": 0.00015827599012604583, + "loss": 2.0732, + "step": 38200 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001582213775476768, + "loss": 2.9127, + "step": 38250 + }, + { + "epoch": 0.42, + "learning_rate": 0.00015816676496930774, + "loss": 2.2365, + "step": 38300 + }, + { + "epoch": 0.42, + "learning_rate": 0.00015811215239093867, + "loss": 3.9376, + "step": 38350 + }, + { + "epoch": 0.42, + "learning_rate": 0.00015805753981256964, + "loss": 2.0433, + "step": 38400 + }, + { + "epoch": 0.42, + "learning_rate": 0.00015800292723420058, + "loss": 2.1487, + "step": 38450 + }, + { + "epoch": 0.42, + "learning_rate": 0.00015794831465583154, + "loss": 1.8283, + "step": 38500 + }, + { + "epoch": 0.42, + "learning_rate": 0.00015789370207746248, + "loss": 1.5619, + "step": 38550 + }, + { + "epoch": 0.42, + "learning_rate": 0.00015783908949909342, + "loss": 1.6508, + "step": 38600 + }, + { + "epoch": 0.42, + "learning_rate": 0.00015778447692072438, + "loss": 1.8076, + "step": 38650 + }, + { + "epoch": 0.42, + "learning_rate": 0.00015772986434235535, + "loss": 1.5081, + "step": 38700 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001576752517639863, + "loss": 1.7372, + "step": 38750 + }, + { + "epoch": 0.42, + "learning_rate": 0.00015762063918561725, + "loss": 1.504, + "step": 38800 + }, + { + "epoch": 0.42, + "learning_rate": 0.00015756602660724819, + "loss": 1.4685, + "step": 38850 + }, + { + "epoch": 0.42, + "learning_rate": 0.00015751141402887915, + "loss": 1.366, + "step": 38900 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001574568014505101, + "loss": 1.3556, + "step": 38950 + }, + { + "epoch": 0.43, + "learning_rate": 0.00015740218887214105, + "loss": 1.328, + "step": 39000 + }, + { + "epoch": 0.43, + "learning_rate": 0.000157347576293772, + "loss": 1.672, + "step": 39050 + }, + { + "epoch": 0.43, + "learning_rate": 0.00015729296371540293, + "loss": 1.2776, + "step": 39100 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001572383511370339, + "loss": 1.619, + "step": 39150 + }, + { + "epoch": 0.43, + "learning_rate": 0.00015718373855866483, + "loss": 1.4484, + "step": 39200 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001571291259802958, + "loss": 1.4561, + "step": 39250 + }, + { + "epoch": 0.43, + "learning_rate": 0.00015707451340192673, + "loss": 1.5445, + "step": 39300 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001570199008235577, + "loss": 1.6477, + "step": 39350 + }, + { + "epoch": 0.43, + "learning_rate": 0.00015696528824518864, + "loss": 1.483, + "step": 39400 + }, + { + "epoch": 0.43, + "learning_rate": 0.00015691067566681957, + "loss": 1.4913, + "step": 39450 + }, + { + "epoch": 0.43, + "learning_rate": 0.00015685606308845054, + "loss": 1.2746, + "step": 39500 + }, + { + "epoch": 0.43, + "learning_rate": 0.00015680145051008148, + "loss": 1.4588, + "step": 39550 + }, + { + "epoch": 0.43, + "learning_rate": 0.00015674683793171244, + "loss": 1.3793, + "step": 39600 + }, + { + "epoch": 0.43, + "learning_rate": 0.00015669222535334338, + "loss": 1.4776, + "step": 39650 + }, + { + "epoch": 0.43, + "learning_rate": 0.00015663761277497434, + "loss": 1.7906, + "step": 39700 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001565830001966053, + "loss": 1.4083, + "step": 39750 + }, + { + "epoch": 0.43, + "learning_rate": 0.00015652838761823624, + "loss": 1.5248, + "step": 39800 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001564737750398672, + "loss": 1.2159, + "step": 39850 + }, + { + "epoch": 0.44, + "learning_rate": 0.00015641916246149815, + "loss": 1.4073, + "step": 39900 + }, + { + "epoch": 0.44, + "learning_rate": 0.00015636454988312908, + "loss": 1.2702, + "step": 39950 + }, + { + "epoch": 0.44, + "learning_rate": 0.00015630993730476005, + "loss": 1.33, + "step": 40000 + }, + { + "epoch": 0.44, + "learning_rate": 0.000156255324726391, + "loss": 1.4365, + "step": 40050 + }, + { + "epoch": 0.44, + "learning_rate": 0.00015620071214802195, + "loss": 1.2484, + "step": 40100 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001561460995696529, + "loss": 1.2985, + "step": 40150 + }, + { + "epoch": 0.44, + "learning_rate": 0.00015609148699128385, + "loss": 1.3169, + "step": 40200 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001560368744129148, + "loss": 1.2415, + "step": 40250 + }, + { + "epoch": 0.44, + "learning_rate": 0.00015598226183454573, + "loss": 1.0357, + "step": 40300 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001559276492561767, + "loss": 1.3613, + "step": 40350 + }, + { + "epoch": 0.44, + "learning_rate": 0.00015587303667780763, + "loss": 1.1524, + "step": 40400 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001558184240994386, + "loss": 1.4132, + "step": 40450 + }, + { + "epoch": 0.44, + "learning_rate": 0.00015576381152106953, + "loss": 1.4276, + "step": 40500 + } + ], + "logging_steps": 50, + "max_steps": 183108, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 500, + "total_flos": 2.90367192517632e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +}