{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9998665421059656, "global_step": 3746, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 0.0002, "loss": 10.6012, "step": 20 }, { "epoch": 0.02, "learning_rate": 0.0004, "loss": 3.651, "step": 40 }, { "epoch": 0.03, "learning_rate": 0.00059, "loss": 3.3911, "step": 60 }, { "epoch": 0.04, "learning_rate": 0.0007491827839825661, "loss": 3.0481, "step": 80 }, { "epoch": 0.05, "learning_rate": 0.0007453010078997548, "loss": 3.4039, "step": 100 }, { "epoch": 0.05, "eval_loss": Infinity, "eval_runtime": 1260.1628, "eval_samples_per_second": 21.123, "eval_steps_per_second": 0.66, "eval_wer": 1.0, "step": 100 }, { "epoch": 0.06, "learning_rate": 0.0007412149278125851, "loss": 3.0553, "step": 120 }, { "epoch": 0.07, "learning_rate": 0.0007371288477254155, "loss": 3.2191, "step": 140 }, { "epoch": 0.09, "learning_rate": 0.0007332470716426042, "loss": 3.4094, "step": 160 }, { "epoch": 0.1, "learning_rate": 0.0007291609915554345, "loss": 3.0607, "step": 180 }, { "epoch": 0.11, "learning_rate": 0.0007252792154726233, "loss": 3.4396, "step": 200 }, { "epoch": 0.11, "eval_loss": Infinity, "eval_runtime": 1235.1686, "eval_samples_per_second": 21.55, "eval_steps_per_second": 0.674, "eval_wer": 1.0, "step": 200 }, { "epoch": 0.12, "learning_rate": 0.0007211931353854536, "loss": 3.1198, "step": 220 }, { "epoch": 0.13, "learning_rate": 0.0007171070552982838, "loss": 3.1801, "step": 240 }, { "epoch": 0.14, "learning_rate": 0.0007132252792154726, "loss": 3.3954, "step": 260 }, { "epoch": 0.15, "learning_rate": 0.000709139199128303, "loss": 3.0566, "step": 280 }, { "epoch": 0.16, "learning_rate": 0.0007052574230454917, "loss": 3.483, "step": 300 }, { "epoch": 0.16, "eval_loss": Infinity, "eval_runtime": 1262.2253, "eval_samples_per_second": 21.088, "eval_steps_per_second": 0.659, "eval_wer": 1.0, "step": 300 }, { "epoch": 0.17, "learning_rate": 0.000701171342958322, "loss": 3.1616, "step": 320 }, { "epoch": 0.18, "learning_rate": 0.0006970852628711522, "loss": 3.2017, "step": 340 }, { "epoch": 0.19, "learning_rate": 0.0006932034867883411, "loss": 3.4474, "step": 360 }, { "epoch": 0.2, "learning_rate": 0.0006891174067011714, "loss": 3.0656, "step": 380 }, { "epoch": 0.21, "learning_rate": 0.0006852356306183601, "loss": 3.5014, "step": 400 }, { "epoch": 0.21, "eval_loss": Infinity, "eval_runtime": 1255.7751, "eval_samples_per_second": 21.196, "eval_steps_per_second": 0.663, "eval_wer": 1.0, "step": 400 }, { "epoch": 0.22, "learning_rate": 0.0006811495505311904, "loss": 3.1757, "step": 420 }, { "epoch": 0.23, "learning_rate": 0.0006770634704440206, "loss": 3.2038, "step": 440 }, { "epoch": 0.25, "learning_rate": 0.0006731816943612095, "loss": 3.4072, "step": 460 }, { "epoch": 0.26, "learning_rate": 0.0006690956142740398, "loss": 3.0498, "step": 480 }, { "epoch": 0.27, "learning_rate": 0.00066500953418687, "loss": 3.331, "step": 500 }, { "epoch": 0.27, "eval_loss": Infinity, "eval_runtime": 1239.2922, "eval_samples_per_second": 21.478, "eval_steps_per_second": 0.671, "eval_wer": 1.0, "step": 500 }, { "epoch": 0.28, "learning_rate": 0.0006609234540997004, "loss": 3.0833, "step": 520 }, { "epoch": 0.29, "learning_rate": 0.0006568373740125306, "loss": 3.1989, "step": 540 }, { "epoch": 0.3, "learning_rate": 0.0006531599019340779, "loss": 3.5001, "step": 560 }, { "epoch": 0.31, "learning_rate": 0.0006490738218469082, "loss": 3.0685, "step": 580 }, { "epoch": 0.32, "learning_rate": 0.000645192045764097, "loss": 3.4809, "step": 600 }, { "epoch": 0.32, "eval_loss": Infinity, "eval_runtime": 1232.3119, "eval_samples_per_second": 21.6, "eval_steps_per_second": 0.675, "eval_wer": 1.0, "step": 600 }, { "epoch": 0.33, "learning_rate": 0.0006411059656769273, "loss": 3.1877, "step": 620 }, { "epoch": 0.34, "learning_rate": 0.0006370198855897576, "loss": 3.2365, "step": 640 }, { "epoch": 0.35, "learning_rate": 0.0006333424135113049, "loss": 3.67, "step": 660 }, { "epoch": 0.36, "learning_rate": 0.0006292563334241351, "loss": 3.086, "step": 680 }, { "epoch": 0.37, "learning_rate": 0.0006253745573413239, "loss": 3.4678, "step": 700 }, { "epoch": 0.37, "eval_loss": Infinity, "eval_runtime": 1236.8236, "eval_samples_per_second": 21.521, "eval_steps_per_second": 0.673, "eval_wer": 1.0, "step": 700 }, { "epoch": 0.38, "learning_rate": 0.0006212884772541542, "loss": 3.2344, "step": 720 }, { "epoch": 0.4, "learning_rate": 0.0006172023971669845, "loss": 3.2507, "step": 740 }, { "epoch": 0.41, "learning_rate": 0.0006131163170798147, "loss": 3.4283, "step": 760 }, { "epoch": 0.42, "learning_rate": 0.0006090302369926451, "loss": 3.0802, "step": 780 }, { "epoch": 0.43, "learning_rate": 0.0006051484609098339, "loss": 3.4596, "step": 800 }, { "epoch": 0.43, "eval_loss": Infinity, "eval_runtime": 1233.785, "eval_samples_per_second": 21.574, "eval_steps_per_second": 0.674, "eval_wer": 1.0, "step": 800 }, { "epoch": 0.44, "learning_rate": 0.0006010623808226641, "loss": 3.2227, "step": 820 }, { "epoch": 0.45, "learning_rate": 0.0005969763007354945, "loss": 3.2357, "step": 840 }, { "epoch": 0.46, "learning_rate": 0.0005930945246526832, "loss": 3.5563, "step": 860 }, { "epoch": 0.47, "learning_rate": 0.0005890084445655135, "loss": 3.0845, "step": 880 }, { "epoch": 0.48, "learning_rate": 0.0005851266684827023, "loss": 3.4644, "step": 900 }, { "epoch": 0.48, "eval_loss": Infinity, "eval_runtime": 1235.1488, "eval_samples_per_second": 21.55, "eval_steps_per_second": 0.674, "eval_wer": 1.0, "step": 900 }, { "epoch": 0.49, "learning_rate": 0.0005810405883955325, "loss": 3.2092, "step": 920 }, { "epoch": 0.5, "learning_rate": 0.0005769545083083629, "loss": 3.2157, "step": 940 }, { "epoch": 0.51, "learning_rate": 0.0005730727322255517, "loss": 3.4679, "step": 960 }, { "epoch": 0.52, "learning_rate": 0.0005689866521383819, "loss": 3.0662, "step": 980 }, { "epoch": 0.53, "learning_rate": 0.0005651048760555707, "loss": 3.4671, "step": 1000 }, { "epoch": 0.53, "eval_loss": Infinity, "eval_runtime": 1241.2249, "eval_samples_per_second": 21.445, "eval_steps_per_second": 0.67, "eval_wer": 1.0, "step": 1000 }, { "epoch": 0.54, "learning_rate": 0.0005610187959684009, "loss": 3.2428, "step": 1020 }, { "epoch": 0.56, "learning_rate": 0.0005569327158812314, "loss": 3.2165, "step": 1040 }, { "epoch": 0.57, "learning_rate": 0.0005530509397984201, "loss": 3.5106, "step": 1060 }, { "epoch": 0.58, "learning_rate": 0.0005489648597112503, "loss": 3.1137, "step": 1080 }, { "epoch": 0.59, "learning_rate": 0.0005452873876327976, "loss": 3.6005, "step": 1100 }, { "epoch": 0.59, "eval_loss": Infinity, "eval_runtime": 1236.2955, "eval_samples_per_second": 21.53, "eval_steps_per_second": 0.673, "eval_wer": 1.0, "step": 1100 }, { "epoch": 0.6, "learning_rate": 0.0005412013075456279, "loss": 3.2702, "step": 1120 }, { "epoch": 0.61, "learning_rate": 0.0005371152274584582, "loss": 3.6768, "step": 1140 }, { "epoch": 0.62, "learning_rate": 0.000533233451375647, "loss": 3.6313, "step": 1160 }, { "epoch": 0.63, "learning_rate": 0.0005291473712884773, "loss": 3.2456, "step": 1180 }, { "epoch": 0.64, "learning_rate": 0.000525265595205666, "loss": 3.9182, "step": 1200 }, { "epoch": 0.64, "eval_loss": Infinity, "eval_runtime": 1229.4316, "eval_samples_per_second": 21.651, "eval_steps_per_second": 0.677, "eval_wer": 1.0, "step": 1200 }, { "epoch": 0.65, "learning_rate": 0.0005211795151184963, "loss": 3.3805, "step": 1220 }, { "epoch": 0.66, "learning_rate": 0.0005170934350313267, "loss": 3.4687, "step": 1240 }, { "epoch": 0.67, "learning_rate": 0.000513007354944157, "loss": 3.4473, "step": 1260 }, { "epoch": 0.68, "learning_rate": 0.0005089212748569872, "loss": 3.2232, "step": 1280 }, { "epoch": 0.69, "learning_rate": 0.000505039498774176, "loss": 3.6466, "step": 1300 }, { "epoch": 0.69, "eval_loss": Infinity, "eval_runtime": 1237.3739, "eval_samples_per_second": 21.512, "eval_steps_per_second": 0.672, "eval_wer": 1.0, "step": 1300 }, { "epoch": 0.7, "learning_rate": 0.0005009534186870062, "loss": 3.3496, "step": 1320 }, { "epoch": 0.72, "learning_rate": 0.0004968673385998365, "loss": 3.3918, "step": 1340 }, { "epoch": 0.73, "learning_rate": 0.0004927812585126669, "loss": 3.3232, "step": 1360 }, { "epoch": 0.74, "learning_rate": 0.0004886951784254972, "loss": 3.288, "step": 1380 }, { "epoch": 0.75, "learning_rate": 0.00048481340234268587, "loss": 3.6932, "step": 1400 }, { "epoch": 0.75, "eval_loss": Infinity, "eval_runtime": 1236.694, "eval_samples_per_second": 21.524, "eval_steps_per_second": 0.673, "eval_wer": 1.0, "step": 1400 }, { "epoch": 0.76, "learning_rate": 0.00048072732225551624, "loss": 3.5411, "step": 1420 }, { "epoch": 0.77, "learning_rate": 0.0004766412421683465, "loss": 3.5658, "step": 1440 }, { "epoch": 0.78, "learning_rate": 0.00047275946608553524, "loss": 3.7666, "step": 1460 }, { "epoch": 0.79, "learning_rate": 0.0004686733859983656, "loss": 3.7564, "step": 1480 }, { "epoch": 0.8, "learning_rate": 0.00046479160991555436, "loss": 3.7939, "step": 1500 }, { "epoch": 0.8, "eval_loss": Infinity, "eval_runtime": 1235.7295, "eval_samples_per_second": 21.54, "eval_steps_per_second": 0.673, "eval_wer": 1.0, "step": 1500 }, { "epoch": 0.81, "learning_rate": 0.00046070552982838467, "loss": 3.7471, "step": 1520 }, { "epoch": 0.82, "learning_rate": 0.00045661944974121493, "loss": 3.7457, "step": 1540 }, { "epoch": 0.83, "learning_rate": 0.0004527376736584037, "loss": 3.798, "step": 1560 }, { "epoch": 0.84, "learning_rate": 0.00044865159357123405, "loss": 3.7611, "step": 1580 }, { "epoch": 0.85, "learning_rate": 0.0004447698174884228, "loss": 3.9284, "step": 1600 }, { "epoch": 0.85, "eval_loss": Infinity, "eval_runtime": 1251.8158, "eval_samples_per_second": 21.264, "eval_steps_per_second": 0.665, "eval_wer": 1.0, "step": 1600 }, { "epoch": 0.86, "learning_rate": 0.0004406837374012531, "loss": 3.7416, "step": 1620 }, { "epoch": 0.88, "learning_rate": 0.00043659765731408337, "loss": 3.7416, "step": 1640 }, { "epoch": 0.89, "learning_rate": 0.0004327158812312721, "loss": 3.8024, "step": 1660 }, { "epoch": 0.9, "learning_rate": 0.0004286298011441025, "loss": 3.7578, "step": 1680 }, { "epoch": 0.91, "learning_rate": 0.0004247480250612912, "loss": 3.7859, "step": 1700 }, { "epoch": 0.91, "eval_loss": Infinity, "eval_runtime": 1235.734, "eval_samples_per_second": 21.54, "eval_steps_per_second": 0.673, "eval_wer": 1.0, "step": 1700 }, { "epoch": 0.92, "learning_rate": 0.0004206619449741215, "loss": 3.7427, "step": 1720 }, { "epoch": 0.93, "learning_rate": 0.0004165758648869518, "loss": 3.751, "step": 1740 }, { "epoch": 0.94, "learning_rate": 0.00041248978479978206, "loss": 3.6701, "step": 1760 }, { "epoch": 0.95, "learning_rate": 0.0004084037047126123, "loss": 3.7613, "step": 1780 }, { "epoch": 0.96, "learning_rate": 0.00040472623263415966, "loss": 3.9363, "step": 1800 }, { "epoch": 0.96, "eval_loss": Infinity, "eval_runtime": 1236.9158, "eval_samples_per_second": 21.52, "eval_steps_per_second": 0.673, "eval_wer": 1.0, "step": 1800 }, { "epoch": 0.97, "learning_rate": 0.0004006401525469899, "loss": 3.7464, "step": 1820 }, { "epoch": 0.98, "learning_rate": 0.00039655407245982023, "loss": 3.7414, "step": 1840 }, { "epoch": 0.99, "learning_rate": 0.000392672296377009, "loss": 3.8036, "step": 1860 }, { "epoch": 1.0, "learning_rate": 0.00038879052029419783, "loss": 3.8768, "step": 1880 }, { "epoch": 1.01, "learning_rate": 0.0003847044402070281, "loss": 3.7573, "step": 1900 }, { "epoch": 1.01, "eval_loss": Infinity, "eval_runtime": 1232.3535, "eval_samples_per_second": 21.599, "eval_steps_per_second": 0.675, "eval_wer": 1.0, "step": 1900 }, { "epoch": 1.03, "learning_rate": 0.00038061836011985835, "loss": 3.6959, "step": 1920 }, { "epoch": 1.04, "learning_rate": 0.00037653228003268867, "loss": 3.7313, "step": 1940 }, { "epoch": 1.05, "learning_rate": 0.00037244619994551893, "loss": 3.7676, "step": 1960 }, { "epoch": 1.06, "learning_rate": 0.0003685644238627077, "loss": 3.7872, "step": 1980 }, { "epoch": 1.07, "learning_rate": 0.000364478343775538, "loss": 3.7553, "step": 2000 }, { "epoch": 1.07, "eval_loss": Infinity, "eval_runtime": 1239.2247, "eval_samples_per_second": 21.48, "eval_steps_per_second": 0.671, "eval_wer": 1.0, "step": 2000 }, { "epoch": 1.08, "learning_rate": 0.0003603922636883683, "loss": 3.694, "step": 2020 }, { "epoch": 1.09, "learning_rate": 0.0003565104876055571, "loss": 3.8389, "step": 2040 }, { "epoch": 1.1, "learning_rate": 0.00035242440751838736, "loss": 3.7573, "step": 2060 }, { "epoch": 1.11, "learning_rate": 0.0003483383274312177, "loss": 3.6641, "step": 2080 }, { "epoch": 1.12, "learning_rate": 0.00034425224734404794, "loss": 3.7606, "step": 2100 }, { "epoch": 1.12, "eval_loss": Infinity, "eval_runtime": 1237.7997, "eval_samples_per_second": 21.504, "eval_steps_per_second": 0.672, "eval_wer": 1.0, "step": 2100 }, { "epoch": 1.13, "learning_rate": 0.00034016616725687825, "loss": 3.6827, "step": 2120 }, { "epoch": 1.14, "learning_rate": 0.000336284391174067, "loss": 3.8375, "step": 2140 }, { "epoch": 1.15, "learning_rate": 0.0003321983110868973, "loss": 3.7527, "step": 2160 }, { "epoch": 1.16, "learning_rate": 0.0003283165350040861, "loss": 3.7881, "step": 2180 }, { "epoch": 1.17, "learning_rate": 0.0003242304549169164, "loss": 3.7514, "step": 2200 }, { "epoch": 1.17, "eval_loss": Infinity, "eval_runtime": 1237.076, "eval_samples_per_second": 21.517, "eval_steps_per_second": 0.673, "eval_wer": 1.0, "step": 2200 }, { "epoch": 1.19, "learning_rate": 0.0003201443748297467, "loss": 3.6727, "step": 2220 }, { "epoch": 1.2, "learning_rate": 0.00031626259874693543, "loss": 3.8424, "step": 2240 }, { "epoch": 1.21, "learning_rate": 0.00031217651865976574, "loss": 3.7594, "step": 2260 }, { "epoch": 1.22, "learning_rate": 0.00030829474257695454, "loss": 3.7862, "step": 2280 }, { "epoch": 1.23, "learning_rate": 0.0003042086624897848, "loss": 3.7472, "step": 2300 }, { "epoch": 1.23, "eval_loss": Infinity, "eval_runtime": 1236.6319, "eval_samples_per_second": 21.525, "eval_steps_per_second": 0.673, "eval_wer": 1.0, "step": 2300 }, { "epoch": 1.24, "learning_rate": 0.0003001225824026151, "loss": 3.6956, "step": 2320 }, { "epoch": 1.25, "learning_rate": 0.0002960365023154454, "loss": 3.7248, "step": 2340 }, { "epoch": 1.26, "learning_rate": 0.0002919504222282757, "loss": 3.7531, "step": 2360 }, { "epoch": 1.27, "learning_rate": 0.0002882729501498229, "loss": 3.9245, "step": 2380 }, { "epoch": 1.28, "learning_rate": 0.00028418687006265324, "loss": 3.7478, "step": 2400 }, { "epoch": 1.28, "eval_loss": Infinity, "eval_runtime": 1236.7725, "eval_samples_per_second": 21.522, "eval_steps_per_second": 0.673, "eval_wer": 1.0, "step": 2400 }, { "epoch": 1.29, "learning_rate": 0.00028010078997548355, "loss": 3.6826, "step": 2420 }, { "epoch": 1.3, "learning_rate": 0.0002760147098883138, "loss": 3.7236, "step": 2440 }, { "epoch": 1.31, "learning_rate": 0.00027192862980114413, "loss": 3.7609, "step": 2460 }, { "epoch": 1.32, "learning_rate": 0.00026804685371833287, "loss": 3.7846, "step": 2480 }, { "epoch": 1.33, "learning_rate": 0.0002639607736311632, "loss": 3.7496, "step": 2500 }, { "epoch": 1.33, "eval_loss": Infinity, "eval_runtime": 1233.5496, "eval_samples_per_second": 21.578, "eval_steps_per_second": 0.674, "eval_wer": 1.0, "step": 2500 }, { "epoch": 1.35, "learning_rate": 0.00025987469354399345, "loss": 3.6785, "step": 2520 }, { "epoch": 1.36, "learning_rate": 0.00025578861345682376, "loss": 3.7212, "step": 2540 }, { "epoch": 1.37, "learning_rate": 0.0002517025333696541, "loss": 3.7637, "step": 2560 }, { "epoch": 1.38, "learning_rate": 0.00024761645328248434, "loss": 3.6513, "step": 2580 }, { "epoch": 1.39, "learning_rate": 0.00024353037319531465, "loss": 3.7513, "step": 2600 }, { "epoch": 1.39, "eval_loss": Infinity, "eval_runtime": 1231.8132, "eval_samples_per_second": 21.609, "eval_steps_per_second": 0.675, "eval_wer": 1.0, "step": 2600 }, { "epoch": 1.4, "learning_rate": 0.00023944429310814494, "loss": 3.6818, "step": 2620 }, { "epoch": 1.41, "learning_rate": 0.00023556251702533368, "loss": 3.8417, "step": 2640 }, { "epoch": 1.42, "learning_rate": 0.000231476436938164, "loss": 3.7639, "step": 2660 }, { "epoch": 1.43, "learning_rate": 0.00022759466085535277, "loss": 3.7842, "step": 2680 }, { "epoch": 1.44, "learning_rate": 0.00022350858076818309, "loss": 3.7497, "step": 2700 }, { "epoch": 1.44, "eval_loss": Infinity, "eval_runtime": 1234.8437, "eval_samples_per_second": 21.556, "eval_steps_per_second": 0.674, "eval_wer": 1.0, "step": 2700 }, { "epoch": 1.45, "learning_rate": 0.00021942250068101335, "loss": 3.6795, "step": 2720 }, { "epoch": 1.46, "learning_rate": 0.00021554072459820212, "loss": 3.8421, "step": 2740 }, { "epoch": 1.47, "learning_rate": 0.00021145464451103243, "loss": 3.7614, "step": 2760 }, { "epoch": 1.48, "learning_rate": 0.0002075728684282212, "loss": 3.9259, "step": 2780 }, { "epoch": 1.49, "learning_rate": 0.00020348678834105147, "loss": 3.7539, "step": 2800 }, { "epoch": 1.49, "eval_loss": Infinity, "eval_runtime": 1240.1103, "eval_samples_per_second": 21.464, "eval_steps_per_second": 0.671, "eval_wer": 1.0, "step": 2800 }, { "epoch": 1.51, "learning_rate": 0.00019940070825388178, "loss": 3.6889, "step": 2820 }, { "epoch": 1.52, "learning_rate": 0.00019551893217107055, "loss": 3.843, "step": 2840 }, { "epoch": 1.53, "learning_rate": 0.00019143285208390087, "loss": 3.7666, "step": 2860 }, { "epoch": 1.54, "learning_rate": 0.00018755107600108964, "loss": 3.786, "step": 2880 }, { "epoch": 1.55, "learning_rate": 0.0001834649959139199, "loss": 3.7581, "step": 2900 }, { "epoch": 1.55, "eval_loss": Infinity, "eval_runtime": 1231.1699, "eval_samples_per_second": 21.62, "eval_steps_per_second": 0.676, "eval_wer": 1.0, "step": 2900 }, { "epoch": 1.56, "learning_rate": 0.00017937891582675021, "loss": 3.672, "step": 2920 }, { "epoch": 1.57, "learning_rate": 0.00017549713974393899, "loss": 3.8539, "step": 2940 }, { "epoch": 1.58, "learning_rate": 0.00017141105965676927, "loss": 3.7652, "step": 2960 }, { "epoch": 1.59, "learning_rate": 0.00016752928357395807, "loss": 3.7825, "step": 2980 }, { "epoch": 1.6, "learning_rate": 0.00016344320348678833, "loss": 3.7572, "step": 3000 }, { "epoch": 1.6, "eval_loss": Infinity, "eval_runtime": 1241.8499, "eval_samples_per_second": 21.434, "eval_steps_per_second": 0.67, "eval_wer": 1.0, "step": 3000 }, { "epoch": 1.61, "learning_rate": 0.00015935712339961862, "loss": 3.6906, "step": 3020 }, { "epoch": 1.62, "learning_rate": 0.00015547534731680742, "loss": 3.845, "step": 3040 }, { "epoch": 1.63, "learning_rate": 0.0001513892672296377, "loss": 3.763, "step": 3060 }, { "epoch": 1.64, "learning_rate": 0.00014750749114682648, "loss": 3.779, "step": 3080 }, { "epoch": 1.66, "learning_rate": 0.0001434214110596568, "loss": 3.7589, "step": 3100 }, { "epoch": 1.66, "eval_loss": Infinity, "eval_runtime": 1236.916, "eval_samples_per_second": 21.52, "eval_steps_per_second": 0.673, "eval_wer": 1.0, "step": 3100 }, { "epoch": 1.67, "learning_rate": 0.00013933533097248705, "loss": 3.6948, "step": 3120 }, { "epoch": 1.68, "learning_rate": 0.00013524925088531734, "loss": 3.7211, "step": 3140 }, { "epoch": 1.69, "learning_rate": 0.00013116317079814763, "loss": 3.7588, "step": 3160 }, { "epoch": 1.7, "learning_rate": 0.00012728139471533643, "loss": 3.7898, "step": 3180 }, { "epoch": 1.71, "learning_rate": 0.00012319531462816672, "loss": 3.7592, "step": 3200 }, { "epoch": 1.71, "eval_loss": Infinity, "eval_runtime": 1232.1567, "eval_samples_per_second": 21.603, "eval_steps_per_second": 0.675, "eval_wer": 1.0, "step": 3200 }, { "epoch": 1.72, "learning_rate": 0.000119109234540997, "loss": 3.688, "step": 3220 }, { "epoch": 1.73, "learning_rate": 0.00011522745845818579, "loss": 3.8419, "step": 3240 }, { "epoch": 1.74, "learning_rate": 0.00011114137837101608, "loss": 3.7578, "step": 3260 }, { "epoch": 1.75, "learning_rate": 0.00010705529828384636, "loss": 3.6534, "step": 3280 }, { "epoch": 1.76, "learning_rate": 0.00010296921819667667, "loss": 3.7531, "step": 3300 }, { "epoch": 1.76, "eval_loss": Infinity, "eval_runtime": 1237.4955, "eval_samples_per_second": 21.51, "eval_steps_per_second": 0.672, "eval_wer": 1.0, "step": 3300 }, { "epoch": 1.77, "learning_rate": 9.888313810950695e-05, "loss": 3.691, "step": 3320 }, { "epoch": 1.78, "learning_rate": 9.500136202669572e-05, "loss": 3.8424, "step": 3340 }, { "epoch": 1.79, "learning_rate": 9.091528193952601e-05, "loss": 3.7698, "step": 3360 }, { "epoch": 1.8, "learning_rate": 8.70335058567148e-05, "loss": 3.7836, "step": 3380 }, { "epoch": 1.82, "learning_rate": 8.294742576954509e-05, "loss": 3.7567, "step": 3400 }, { "epoch": 1.82, "eval_loss": Infinity, "eval_runtime": 1242.7888, "eval_samples_per_second": 21.418, "eval_steps_per_second": 0.669, "eval_wer": 1.0, "step": 3400 }, { "epoch": 1.83, "learning_rate": 7.886134568237537e-05, "loss": 3.6741, "step": 3420 }, { "epoch": 1.84, "learning_rate": 7.497956959956416e-05, "loss": 3.8442, "step": 3440 }, { "epoch": 1.85, "learning_rate": 7.089348951239445e-05, "loss": 3.7663, "step": 3460 }, { "epoch": 1.86, "learning_rate": 6.701171342958322e-05, "loss": 3.7789, "step": 3480 }, { "epoch": 1.87, "learning_rate": 6.292563334241352e-05, "loss": 3.7613, "step": 3500 }, { "epoch": 1.87, "eval_loss": Infinity, "eval_runtime": 1244.1733, "eval_samples_per_second": 21.394, "eval_steps_per_second": 0.669, "eval_wer": 1.0, "step": 3500 }, { "epoch": 1.88, "learning_rate": 5.88395532552438e-05, "loss": 3.6884, "step": 3520 }, { "epoch": 1.89, "learning_rate": 5.4957777172432585e-05, "loss": 3.8268, "step": 3540 }, { "epoch": 1.9, "learning_rate": 5.0871697085262866e-05, "loss": 3.7517, "step": 3560 }, { "epoch": 1.91, "learning_rate": 4.6989921002451645e-05, "loss": 3.7802, "step": 3580 }, { "epoch": 1.92, "learning_rate": 4.290384091528194e-05, "loss": 3.7516, "step": 3600 }, { "epoch": 1.92, "eval_loss": Infinity, "eval_runtime": 1253.9236, "eval_samples_per_second": 21.228, "eval_steps_per_second": 0.664, "eval_wer": 1.0, "step": 3600 }, { "epoch": 1.93, "learning_rate": 3.8817760828112234e-05, "loss": 3.6719, "step": 3620 }, { "epoch": 1.94, "learning_rate": 3.4935984745301006e-05, "loss": 3.8471, "step": 3640 }, { "epoch": 1.95, "learning_rate": 3.08499046581313e-05, "loss": 3.7568, "step": 3660 }, { "epoch": 1.96, "learning_rate": 2.6968128575320075e-05, "loss": 3.7887, "step": 3680 }, { "epoch": 1.98, "learning_rate": 2.2882048488150366e-05, "loss": 3.7581, "step": 3700 }, { "epoch": 1.98, "eval_loss": Infinity, "eval_runtime": 1242.0033, "eval_samples_per_second": 21.432, "eval_steps_per_second": 0.67, "eval_wer": 1.0, "step": 3700 }, { "epoch": 1.99, "learning_rate": 1.879596840098066e-05, "loss": 3.6785, "step": 3720 }, { "epoch": 2.0, "learning_rate": 1.4914192318169436e-05, "loss": 3.8524, "step": 3740 }, { "epoch": 2.0, "step": 3746, "total_flos": 7.191358836520801e+19, "train_loss": 3.6355768978691203, "train_runtime": 71670.0084, "train_samples_per_second": 6.691, "train_steps_per_second": 0.052 } ], "max_steps": 3746, "num_train_epochs": 2, "total_flos": 7.191358836520801e+19, "trial_name": null, "trial_params": null }