{ "best_metric": null, "best_model_checkpoint": null, "epoch": 49.99892202659001, "global_step": 34750, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14, "learning_rate": 3.6e-06, "loss": 27.8845, "step": 100 }, { "epoch": 0.29, "learning_rate": 7.35e-06, "loss": 5.2607, "step": 200 }, { "epoch": 0.43, "learning_rate": 1.1099999999999999e-05, "loss": 4.8397, "step": 300 }, { "epoch": 0.57, "learning_rate": 1.485e-05, "loss": 4.6751, "step": 400 }, { "epoch": 0.72, "learning_rate": 1.8599999999999998e-05, "loss": 4.603, "step": 500 }, { "epoch": 0.72, "eval_loss": 4.657182693481445, "eval_runtime": 61.7478, "eval_samples_per_second": 7.385, "eval_steps_per_second": 0.923, "eval_wer": 0.9984898822108125, "step": 500 }, { "epoch": 0.86, "learning_rate": 2.2349999999999998e-05, "loss": 4.5246, "step": 600 }, { "epoch": 1.01, "learning_rate": 2.6099999999999997e-05, "loss": 3.8527, "step": 700 }, { "epoch": 1.15, "learning_rate": 2.985e-05, "loss": 3.0344, "step": 800 }, { "epoch": 1.29, "learning_rate": 3.36e-05, "loss": 2.7616, "step": 900 }, { "epoch": 1.44, "learning_rate": 3.735e-05, "loss": 2.6314, "step": 1000 }, { "epoch": 1.44, "eval_loss": 2.0424230098724365, "eval_runtime": 57.8725, "eval_samples_per_second": 7.879, "eval_steps_per_second": 0.985, "eval_wer": 0.9255511929930534, "step": 1000 }, { "epoch": 1.58, "learning_rate": 4.11e-05, "loss": 2.4936, "step": 1100 }, { "epoch": 1.73, "learning_rate": 4.484999999999999e-05, "loss": 2.4557, "step": 1200 }, { "epoch": 1.87, "learning_rate": 4.8599999999999995e-05, "loss": 2.3667, "step": 1300 }, { "epoch": 2.01, "learning_rate": 5.234999999999999e-05, "loss": 2.3147, "step": 1400 }, { "epoch": 2.16, "learning_rate": 5.6099999999999995e-05, "loss": 2.2708, "step": 1500 }, { "epoch": 2.16, "eval_loss": 0.9889274835586548, "eval_runtime": 59.1503, "eval_samples_per_second": 7.709, "eval_steps_per_second": 0.964, "eval_wer": 0.6988825128360012, "step": 1500 }, { "epoch": 2.3, "learning_rate": 5.985e-05, "loss": 2.1797, "step": 1600 }, { "epoch": 2.45, "learning_rate": 6.359999999999999e-05, "loss": 2.1628, "step": 1700 }, { "epoch": 2.59, "learning_rate": 6.735e-05, "loss": 2.1662, "step": 1800 }, { "epoch": 2.73, "learning_rate": 7.11e-05, "loss": 2.1623, "step": 1900 }, { "epoch": 2.88, "learning_rate": 7.484999999999999e-05, "loss": 2.1769, "step": 2000 }, { "epoch": 2.88, "eval_loss": 0.8366215825080872, "eval_runtime": 56.673, "eval_samples_per_second": 8.046, "eval_steps_per_second": 1.006, "eval_wer": 0.6312292358803987, "step": 2000 }, { "epoch": 3.02, "learning_rate": 7.478015267175572e-05, "loss": 2.1913, "step": 2100 }, { "epoch": 3.17, "learning_rate": 7.455114503816793e-05, "loss": 2.1906, "step": 2200 }, { "epoch": 3.31, "learning_rate": 7.432213740458014e-05, "loss": 2.1964, "step": 2300 }, { "epoch": 3.45, "learning_rate": 7.409312977099236e-05, "loss": 2.1548, "step": 2400 }, { "epoch": 3.6, "learning_rate": 7.386412213740458e-05, "loss": 2.1142, "step": 2500 }, { "epoch": 3.6, "eval_loss": 0.7555108070373535, "eval_runtime": 57.6601, "eval_samples_per_second": 7.908, "eval_steps_per_second": 0.989, "eval_wer": 0.5998187858652975, "step": 2500 }, { "epoch": 3.74, "learning_rate": 7.363511450381678e-05, "loss": 2.1028, "step": 2600 }, { "epoch": 3.88, "learning_rate": 7.3406106870229e-05, "loss": 2.0524, "step": 2700 }, { "epoch": 4.03, "learning_rate": 7.317709923664121e-05, "loss": 2.0426, "step": 2800 }, { "epoch": 4.17, "learning_rate": 7.29503816793893e-05, "loss": 1.9613, "step": 2900 }, { "epoch": 4.32, "learning_rate": 7.272137404580152e-05, "loss": 2.0084, "step": 3000 }, { "epoch": 4.32, "eval_loss": 0.7143925428390503, "eval_runtime": 57.4263, "eval_samples_per_second": 7.941, "eval_steps_per_second": 0.993, "eval_wer": 0.6002718212020538, "step": 3000 }, { "epoch": 4.46, "learning_rate": 7.249236641221373e-05, "loss": 2.0063, "step": 3100 }, { "epoch": 4.6, "learning_rate": 7.226335877862594e-05, "loss": 1.9758, "step": 3200 }, { "epoch": 4.75, "learning_rate": 7.203435114503816e-05, "loss": 1.9313, "step": 3300 }, { "epoch": 4.89, "learning_rate": 7.180534351145037e-05, "loss": 1.9009, "step": 3400 }, { "epoch": 5.04, "learning_rate": 7.15763358778626e-05, "loss": 1.9272, "step": 3500 }, { "epoch": 5.04, "eval_loss": 0.631069004535675, "eval_runtime": 58.3633, "eval_samples_per_second": 7.813, "eval_steps_per_second": 0.977, "eval_wer": 0.5460585925702205, "step": 3500 }, { "epoch": 5.18, "learning_rate": 7.13473282442748e-05, "loss": 1.8728, "step": 3600 }, { "epoch": 5.32, "learning_rate": 7.111832061068701e-05, "loss": 1.8675, "step": 3700 }, { "epoch": 5.47, "learning_rate": 7.088931297709923e-05, "loss": 1.8549, "step": 3800 }, { "epoch": 5.61, "learning_rate": 7.066030534351145e-05, "loss": 1.8491, "step": 3900 }, { "epoch": 5.75, "learning_rate": 7.043358778625953e-05, "loss": 1.8687, "step": 4000 }, { "epoch": 5.75, "eval_loss": 0.6252322196960449, "eval_runtime": 56.9578, "eval_samples_per_second": 8.006, "eval_steps_per_second": 1.001, "eval_wer": 0.5430383569918453, "step": 4000 }, { "epoch": 5.9, "learning_rate": 7.020458015267175e-05, "loss": 1.8377, "step": 4100 }, { "epoch": 6.04, "learning_rate": 6.997557251908396e-05, "loss": 1.8436, "step": 4200 }, { "epoch": 6.19, "learning_rate": 6.974656488549617e-05, "loss": 1.7895, "step": 4300 }, { "epoch": 6.33, "learning_rate": 6.951755725190839e-05, "loss": 1.8045, "step": 4400 }, { "epoch": 6.47, "learning_rate": 6.928854961832061e-05, "loss": 1.8186, "step": 4500 }, { "epoch": 6.47, "eval_loss": 0.5491229891777039, "eval_runtime": 56.8943, "eval_samples_per_second": 8.015, "eval_steps_per_second": 1.002, "eval_wer": 0.49879190576864996, "step": 4500 }, { "epoch": 6.62, "learning_rate": 6.905954198473281e-05, "loss": 1.79, "step": 4600 }, { "epoch": 6.76, "learning_rate": 6.883053435114503e-05, "loss": 1.8018, "step": 4700 }, { "epoch": 6.91, "learning_rate": 6.860152671755724e-05, "loss": 1.7966, "step": 4800 }, { "epoch": 7.05, "learning_rate": 6.837251908396947e-05, "loss": 1.7979, "step": 4900 }, { "epoch": 7.19, "learning_rate": 6.814351145038167e-05, "loss": 1.7364, "step": 5000 }, { "epoch": 7.19, "eval_loss": 0.5463064908981323, "eval_runtime": 56.8201, "eval_samples_per_second": 8.025, "eval_steps_per_second": 1.003, "eval_wer": 0.4959226819691936, "step": 5000 }, { "epoch": 7.34, "learning_rate": 6.791450381679388e-05, "loss": 1.7467, "step": 5100 }, { "epoch": 7.48, "learning_rate": 6.76854961832061e-05, "loss": 1.7082, "step": 5200 }, { "epoch": 7.63, "learning_rate": 6.745648854961832e-05, "loss": 1.7084, "step": 5300 }, { "epoch": 7.77, "learning_rate": 6.72297709923664e-05, "loss": 1.6732, "step": 5400 }, { "epoch": 7.91, "learning_rate": 6.700076335877863e-05, "loss": 1.6809, "step": 5500 }, { "epoch": 7.91, "eval_loss": 0.47242555022239685, "eval_runtime": 56.3595, "eval_samples_per_second": 8.091, "eval_steps_per_second": 1.011, "eval_wer": 0.4483539716097856, "step": 5500 }, { "epoch": 8.06, "learning_rate": 6.677175572519083e-05, "loss": 1.7139, "step": 5600 }, { "epoch": 8.2, "learning_rate": 6.654274809160304e-05, "loss": 1.708, "step": 5700 }, { "epoch": 8.34, "learning_rate": 6.631374045801526e-05, "loss": 1.6987, "step": 5800 }, { "epoch": 8.49, "learning_rate": 6.608473282442748e-05, "loss": 1.6705, "step": 5900 }, { "epoch": 8.63, "learning_rate": 6.585572519083968e-05, "loss": 1.641, "step": 6000 }, { "epoch": 8.63, "eval_loss": 0.46793004870414734, "eval_runtime": 56.3201, "eval_samples_per_second": 8.097, "eval_steps_per_second": 1.012, "eval_wer": 0.44608879492600423, "step": 6000 }, { "epoch": 8.78, "learning_rate": 6.56267175572519e-05, "loss": 1.6277, "step": 6100 }, { "epoch": 8.92, "learning_rate": 6.539770992366411e-05, "loss": 1.6529, "step": 6200 }, { "epoch": 9.06, "learning_rate": 6.516870229007634e-05, "loss": 1.6195, "step": 6300 }, { "epoch": 9.21, "learning_rate": 6.493969465648855e-05, "loss": 1.5983, "step": 6400 }, { "epoch": 9.35, "learning_rate": 6.471068702290075e-05, "loss": 1.572, "step": 6500 }, { "epoch": 9.35, "eval_loss": 0.4386586844921112, "eval_runtime": 56.3246, "eval_samples_per_second": 8.096, "eval_steps_per_second": 1.012, "eval_wer": 0.42358803986710963, "step": 6500 }, { "epoch": 9.5, "learning_rate": 6.448167938931296e-05, "loss": 1.5749, "step": 6600 }, { "epoch": 9.64, "learning_rate": 6.425267175572519e-05, "loss": 1.5561, "step": 6700 }, { "epoch": 9.78, "learning_rate": 6.40236641221374e-05, "loss": 1.559, "step": 6800 }, { "epoch": 9.93, "learning_rate": 6.379465648854962e-05, "loss": 1.5419, "step": 6900 }, { "epoch": 10.07, "learning_rate": 6.356564885496182e-05, "loss": 1.5256, "step": 7000 }, { "epoch": 10.07, "eval_loss": 0.39700397849082947, "eval_runtime": 57.819, "eval_samples_per_second": 7.887, "eval_steps_per_second": 0.986, "eval_wer": 0.4003322259136213, "step": 7000 }, { "epoch": 10.22, "learning_rate": 6.333664122137404e-05, "loss": 1.5122, "step": 7100 }, { "epoch": 10.36, "learning_rate": 6.310763358778626e-05, "loss": 1.4812, "step": 7200 }, { "epoch": 10.5, "learning_rate": 6.287862595419847e-05, "loss": 1.4997, "step": 7300 }, { "epoch": 10.65, "learning_rate": 6.264961832061068e-05, "loss": 1.4933, "step": 7400 }, { "epoch": 10.79, "learning_rate": 6.24206106870229e-05, "loss": 1.5044, "step": 7500 }, { "epoch": 10.79, "eval_loss": 0.3689935803413391, "eval_runtime": 56.6639, "eval_samples_per_second": 8.047, "eval_steps_per_second": 1.006, "eval_wer": 0.3893083660525521, "step": 7500 }, { "epoch": 10.93, "learning_rate": 6.219160305343511e-05, "loss": 1.475, "step": 7600 }, { "epoch": 11.08, "learning_rate": 6.196259541984733e-05, "loss": 1.4826, "step": 7700 }, { "epoch": 11.22, "learning_rate": 6.173358778625954e-05, "loss": 1.4336, "step": 7800 }, { "epoch": 11.37, "learning_rate": 6.150458015267175e-05, "loss": 1.4338, "step": 7900 }, { "epoch": 11.51, "learning_rate": 6.127557251908397e-05, "loss": 1.4563, "step": 8000 }, { "epoch": 11.51, "eval_loss": 0.37522608041763306, "eval_runtime": 56.3933, "eval_samples_per_second": 8.086, "eval_steps_per_second": 1.011, "eval_wer": 0.387496224705527, "step": 8000 }, { "epoch": 11.65, "learning_rate": 6.104656488549618e-05, "loss": 1.45, "step": 8100 }, { "epoch": 11.8, "learning_rate": 6.0817557251908386e-05, "loss": 1.4476, "step": 8200 }, { "epoch": 11.94, "learning_rate": 6.0588549618320606e-05, "loss": 1.4255, "step": 8300 }, { "epoch": 12.09, "learning_rate": 6.035954198473282e-05, "loss": 1.4266, "step": 8400 }, { "epoch": 12.23, "learning_rate": 6.013053435114503e-05, "loss": 1.394, "step": 8500 }, { "epoch": 12.23, "eval_loss": 0.3385707437992096, "eval_runtime": 56.4365, "eval_samples_per_second": 8.08, "eval_steps_per_second": 1.01, "eval_wer": 0.35668982180610087, "step": 8500 }, { "epoch": 12.37, "learning_rate": 5.9901526717557246e-05, "loss": 1.4002, "step": 8600 }, { "epoch": 12.52, "learning_rate": 5.9672519083969466e-05, "loss": 1.3823, "step": 8700 }, { "epoch": 12.66, "learning_rate": 5.944351145038167e-05, "loss": 1.3834, "step": 8800 }, { "epoch": 12.8, "learning_rate": 5.9214503816793886e-05, "loss": 1.3765, "step": 8900 }, { "epoch": 12.95, "learning_rate": 5.89854961832061e-05, "loss": 1.3641, "step": 9000 }, { "epoch": 12.95, "eval_loss": 0.3289976716041565, "eval_runtime": 57.6648, "eval_samples_per_second": 7.908, "eval_steps_per_second": 0.988, "eval_wer": 0.346723044397463, "step": 9000 }, { "epoch": 13.09, "learning_rate": 5.875648854961832e-05, "loss": 1.3599, "step": 9100 }, { "epoch": 13.24, "learning_rate": 5.8527480916030534e-05, "loss": 1.3234, "step": 9200 }, { "epoch": 13.38, "learning_rate": 5.829847328244274e-05, "loss": 1.3323, "step": 9300 }, { "epoch": 13.52, "learning_rate": 5.8069465648854954e-05, "loss": 1.3002, "step": 9400 }, { "epoch": 13.67, "learning_rate": 5.7840458015267174e-05, "loss": 1.2878, "step": 9500 }, { "epoch": 13.67, "eval_loss": 0.28934037685394287, "eval_runtime": 56.1939, "eval_samples_per_second": 8.115, "eval_steps_per_second": 1.014, "eval_wer": 0.31350045303533675, "step": 9500 }, { "epoch": 13.81, "learning_rate": 5.761145038167939e-05, "loss": 1.2958, "step": 9600 }, { "epoch": 13.96, "learning_rate": 5.73824427480916e-05, "loss": 1.293, "step": 9700 }, { "epoch": 14.1, "learning_rate": 5.715343511450381e-05, "loss": 1.286, "step": 9800 }, { "epoch": 14.24, "learning_rate": 5.692442748091603e-05, "loss": 1.2912, "step": 9900 }, { "epoch": 14.39, "learning_rate": 5.669541984732824e-05, "loss": 1.2602, "step": 10000 }, { "epoch": 14.39, "eval_loss": 0.2723310589790344, "eval_runtime": 57.7364, "eval_samples_per_second": 7.898, "eval_steps_per_second": 0.987, "eval_wer": 0.30292962851102384, "step": 10000 }, { "epoch": 14.53, "learning_rate": 5.6466412213740455e-05, "loss": 1.2386, "step": 10100 }, { "epoch": 14.68, "learning_rate": 5.623740458015266e-05, "loss": 1.247, "step": 10200 }, { "epoch": 14.82, "learning_rate": 5.600839694656488e-05, "loss": 1.2611, "step": 10300 }, { "epoch": 14.96, "learning_rate": 5.5779389312977095e-05, "loss": 1.2238, "step": 10400 }, { "epoch": 15.11, "learning_rate": 5.555038167938931e-05, "loss": 1.2302, "step": 10500 }, { "epoch": 15.11, "eval_loss": 0.260337769985199, "eval_runtime": 56.8447, "eval_samples_per_second": 8.022, "eval_steps_per_second": 1.003, "eval_wer": 0.29885231048021743, "step": 10500 }, { "epoch": 15.25, "learning_rate": 5.532137404580152e-05, "loss": 1.2119, "step": 10600 }, { "epoch": 15.4, "learning_rate": 5.509236641221373e-05, "loss": 1.205, "step": 10700 }, { "epoch": 15.54, "learning_rate": 5.486335877862595e-05, "loss": 1.1929, "step": 10800 }, { "epoch": 15.68, "learning_rate": 5.463435114503816e-05, "loss": 1.1917, "step": 10900 }, { "epoch": 15.83, "learning_rate": 5.4405343511450376e-05, "loss": 1.1865, "step": 11000 }, { "epoch": 15.83, "eval_loss": 0.24401792883872986, "eval_runtime": 56.3786, "eval_samples_per_second": 8.088, "eval_steps_per_second": 1.011, "eval_wer": 0.279371790999698, "step": 11000 }, { "epoch": 15.97, "learning_rate": 5.417633587786259e-05, "loss": 1.1634, "step": 11100 }, { "epoch": 16.11, "learning_rate": 5.394732824427481e-05, "loss": 1.162, "step": 11200 }, { "epoch": 16.26, "learning_rate": 5.3718320610687016e-05, "loss": 1.1556, "step": 11300 }, { "epoch": 16.4, "learning_rate": 5.348931297709923e-05, "loss": 1.1432, "step": 11400 }, { "epoch": 16.55, "learning_rate": 5.326030534351144e-05, "loss": 1.1491, "step": 11500 }, { "epoch": 16.55, "eval_loss": 0.24998041987419128, "eval_runtime": 57.3291, "eval_samples_per_second": 7.954, "eval_steps_per_second": 0.994, "eval_wer": 0.278767743884023, "step": 11500 }, { "epoch": 16.69, "learning_rate": 5.303129770992366e-05, "loss": 1.1351, "step": 11600 }, { "epoch": 16.83, "learning_rate": 5.280458015267176e-05, "loss": 1.1448, "step": 11700 }, { "epoch": 16.98, "learning_rate": 5.2575572519083964e-05, "loss": 1.123, "step": 11800 }, { "epoch": 17.12, "learning_rate": 5.234656488549618e-05, "loss": 1.1066, "step": 11900 }, { "epoch": 17.27, "learning_rate": 5.211755725190839e-05, "loss": 1.093, "step": 12000 }, { "epoch": 17.27, "eval_loss": 0.22785192728042603, "eval_runtime": 57.123, "eval_samples_per_second": 7.983, "eval_steps_per_second": 0.998, "eval_wer": 0.2629115070975536, "step": 12000 }, { "epoch": 17.41, "learning_rate": 5.188854961832061e-05, "loss": 1.115, "step": 12100 }, { "epoch": 17.55, "learning_rate": 5.1659541984732825e-05, "loss": 1.0895, "step": 12200 }, { "epoch": 17.7, "learning_rate": 5.143053435114503e-05, "loss": 1.0705, "step": 12300 }, { "epoch": 17.84, "learning_rate": 5.1201526717557245e-05, "loss": 1.07, "step": 12400 }, { "epoch": 17.98, "learning_rate": 5.0972519083969465e-05, "loss": 1.0367, "step": 12500 }, { "epoch": 17.98, "eval_loss": 0.2076062262058258, "eval_runtime": 56.1861, "eval_samples_per_second": 8.116, "eval_steps_per_second": 1.014, "eval_wer": 0.24433705829054667, "step": 12500 }, { "epoch": 18.13, "learning_rate": 5.074351145038168e-05, "loss": 1.0313, "step": 12600 }, { "epoch": 18.27, "learning_rate": 5.051450381679389e-05, "loss": 1.0236, "step": 12700 }, { "epoch": 18.42, "learning_rate": 5.02854961832061e-05, "loss": 1.0035, "step": 12800 }, { "epoch": 18.56, "learning_rate": 5.005648854961831e-05, "loss": 1.0105, "step": 12900 }, { "epoch": 18.7, "learning_rate": 4.982748091603053e-05, "loss": 0.9954, "step": 13000 }, { "epoch": 18.7, "eval_loss": 0.18436060845851898, "eval_runtime": 58.6813, "eval_samples_per_second": 7.771, "eval_steps_per_second": 0.971, "eval_wer": 0.22591362126245848, "step": 13000 }, { "epoch": 18.85, "learning_rate": 4.9598473282442746e-05, "loss": 0.9883, "step": 13100 }, { "epoch": 18.99, "learning_rate": 4.936946564885495e-05, "loss": 0.9908, "step": 13200 }, { "epoch": 19.14, "learning_rate": 4.9140458015267166e-05, "loss": 0.9634, "step": 13300 }, { "epoch": 19.28, "learning_rate": 4.8911450381679386e-05, "loss": 0.9628, "step": 13400 }, { "epoch": 19.42, "learning_rate": 4.86824427480916e-05, "loss": 0.99, "step": 13500 }, { "epoch": 19.42, "eval_loss": 0.17937178909778595, "eval_runtime": 57.9354, "eval_samples_per_second": 7.871, "eval_steps_per_second": 0.984, "eval_wer": 0.21790999697976443, "step": 13500 }, { "epoch": 19.57, "learning_rate": 4.845343511450381e-05, "loss": 0.9627, "step": 13600 }, { "epoch": 19.71, "learning_rate": 4.822442748091602e-05, "loss": 0.969, "step": 13700 }, { "epoch": 19.86, "learning_rate": 4.799541984732824e-05, "loss": 0.9507, "step": 13800 }, { "epoch": 20.0, "learning_rate": 4.776641221374045e-05, "loss": 0.9696, "step": 13900 }, { "epoch": 20.14, "learning_rate": 4.753740458015267e-05, "loss": 0.9385, "step": 14000 }, { "epoch": 20.14, "eval_loss": 0.1765013188123703, "eval_runtime": 56.2969, "eval_samples_per_second": 8.1, "eval_steps_per_second": 1.012, "eval_wer": 0.2121715493808517, "step": 14000 }, { "epoch": 20.29, "learning_rate": 4.730839694656488e-05, "loss": 0.9264, "step": 14100 }, { "epoch": 20.43, "learning_rate": 4.70793893129771e-05, "loss": 0.9399, "step": 14200 }, { "epoch": 20.57, "learning_rate": 4.685038167938931e-05, "loss": 0.923, "step": 14300 }, { "epoch": 20.72, "learning_rate": 4.662137404580152e-05, "loss": 0.9128, "step": 14400 }, { "epoch": 20.86, "learning_rate": 4.6392366412213734e-05, "loss": 0.8952, "step": 14500 }, { "epoch": 20.86, "eval_loss": 0.17056496441364288, "eval_runtime": 56.9013, "eval_samples_per_second": 8.014, "eval_steps_per_second": 1.002, "eval_wer": 0.19737239504681364, "step": 14500 }, { "epoch": 21.01, "learning_rate": 4.6163358778625954e-05, "loss": 0.9509, "step": 14600 }, { "epoch": 21.15, "learning_rate": 4.593435114503817e-05, "loss": 0.8967, "step": 14700 }, { "epoch": 21.29, "learning_rate": 4.5705343511450374e-05, "loss": 0.8928, "step": 14800 }, { "epoch": 21.44, "learning_rate": 4.547633587786259e-05, "loss": 0.8731, "step": 14900 }, { "epoch": 21.58, "learning_rate": 4.524732824427481e-05, "loss": 0.8841, "step": 15000 }, { "epoch": 21.58, "eval_loss": 0.17911894619464874, "eval_runtime": 56.4012, "eval_samples_per_second": 8.085, "eval_steps_per_second": 1.011, "eval_wer": 0.1969193597100574, "step": 15000 }, { "epoch": 21.73, "learning_rate": 4.501832061068702e-05, "loss": 0.8816, "step": 15100 }, { "epoch": 21.87, "learning_rate": 4.4789312977099235e-05, "loss": 0.8801, "step": 15200 }, { "epoch": 22.01, "learning_rate": 4.456030534351144e-05, "loss": 0.9044, "step": 15300 }, { "epoch": 22.16, "learning_rate": 4.433129770992366e-05, "loss": 0.8849, "step": 15400 }, { "epoch": 22.3, "learning_rate": 4.4102290076335875e-05, "loss": 0.847, "step": 15500 }, { "epoch": 22.3, "eval_loss": 0.17799803614616394, "eval_runtime": 57.0716, "eval_samples_per_second": 7.99, "eval_steps_per_second": 0.999, "eval_wer": 0.2059800664451827, "step": 15500 }, { "epoch": 22.45, "learning_rate": 4.387328244274809e-05, "loss": 0.8805, "step": 15600 }, { "epoch": 22.59, "learning_rate": 4.3644274809160295e-05, "loss": 0.8516, "step": 15700 }, { "epoch": 22.73, "learning_rate": 4.341984732824427e-05, "loss": 0.8137, "step": 15800 }, { "epoch": 22.88, "learning_rate": 4.3190839694656484e-05, "loss": 0.8335, "step": 15900 }, { "epoch": 23.02, "learning_rate": 4.29618320610687e-05, "loss": 0.8669, "step": 16000 }, { "epoch": 23.02, "eval_loss": 0.16084039211273193, "eval_runtime": 58.1201, "eval_samples_per_second": 7.846, "eval_steps_per_second": 0.981, "eval_wer": 0.18619752340682574, "step": 16000 }, { "epoch": 23.17, "learning_rate": 4.273282442748092e-05, "loss": 0.8242, "step": 16100 }, { "epoch": 23.31, "learning_rate": 4.250381679389313e-05, "loss": 0.8384, "step": 16200 }, { "epoch": 23.45, "learning_rate": 4.227480916030534e-05, "loss": 0.8376, "step": 16300 }, { "epoch": 23.6, "learning_rate": 4.204809160305343e-05, "loss": 0.8277, "step": 16400 }, { "epoch": 23.74, "learning_rate": 4.1819083969465645e-05, "loss": 0.8066, "step": 16500 }, { "epoch": 23.74, "eval_loss": 0.14472883939743042, "eval_runtime": 58.2049, "eval_samples_per_second": 7.834, "eval_steps_per_second": 0.979, "eval_wer": 0.16263968589549985, "step": 16500 }, { "epoch": 23.88, "learning_rate": 4.159007633587786e-05, "loss": 0.8056, "step": 16600 }, { "epoch": 24.03, "learning_rate": 4.136106870229008e-05, "loss": 0.8128, "step": 16700 }, { "epoch": 24.17, "learning_rate": 4.1132061068702285e-05, "loss": 0.8095, "step": 16800 }, { "epoch": 24.32, "learning_rate": 4.09030534351145e-05, "loss": 0.7816, "step": 16900 }, { "epoch": 24.46, "learning_rate": 4.067404580152671e-05, "loss": 0.7908, "step": 17000 }, { "epoch": 24.46, "eval_loss": 0.14570841193199158, "eval_runtime": 56.8345, "eval_samples_per_second": 8.023, "eval_steps_per_second": 1.003, "eval_wer": 0.1655089096949562, "step": 17000 }, { "epoch": 24.6, "learning_rate": 4.044503816793893e-05, "loss": 0.7846, "step": 17100 }, { "epoch": 24.75, "learning_rate": 4.0216030534351146e-05, "loss": 0.7735, "step": 17200 }, { "epoch": 24.89, "learning_rate": 3.998702290076335e-05, "loss": 0.772, "step": 17300 }, { "epoch": 25.04, "learning_rate": 3.9758015267175566e-05, "loss": 0.7676, "step": 17400 }, { "epoch": 25.18, "learning_rate": 3.9529007633587786e-05, "loss": 0.7459, "step": 17500 }, { "epoch": 25.18, "eval_loss": 0.13501976430416107, "eval_runtime": 56.9549, "eval_samples_per_second": 8.006, "eval_steps_per_second": 1.001, "eval_wer": 0.14451827242524917, "step": 17500 }, { "epoch": 25.32, "learning_rate": 3.93e-05, "loss": 0.7394, "step": 17600 }, { "epoch": 25.47, "learning_rate": 3.9070992366412206e-05, "loss": 0.727, "step": 17700 }, { "epoch": 25.61, "learning_rate": 3.884198473282442e-05, "loss": 0.7255, "step": 17800 }, { "epoch": 25.75, "learning_rate": 3.861297709923664e-05, "loss": 0.7122, "step": 17900 }, { "epoch": 25.9, "learning_rate": 3.8383969465648854e-05, "loss": 0.7218, "step": 18000 }, { "epoch": 25.9, "eval_loss": 0.12762019038200378, "eval_runtime": 56.702, "eval_samples_per_second": 8.042, "eval_steps_per_second": 1.005, "eval_wer": 0.1421020839625491, "step": 18000 }, { "epoch": 26.04, "learning_rate": 3.815496183206107e-05, "loss": 0.7219, "step": 18100 }, { "epoch": 26.19, "learning_rate": 3.7925954198473274e-05, "loss": 0.6954, "step": 18200 }, { "epoch": 26.33, "learning_rate": 3.7696946564885494e-05, "loss": 0.6874, "step": 18300 }, { "epoch": 26.47, "learning_rate": 3.746793893129771e-05, "loss": 0.6974, "step": 18400 }, { "epoch": 26.62, "learning_rate": 3.7241221374045795e-05, "loss": 0.703, "step": 18500 }, { "epoch": 26.62, "eval_loss": 0.11774259060621262, "eval_runtime": 58.7528, "eval_samples_per_second": 7.761, "eval_steps_per_second": 0.97, "eval_wer": 0.13017215342796737, "step": 18500 }, { "epoch": 26.76, "learning_rate": 3.7012213740458015e-05, "loss": 0.6976, "step": 18600 }, { "epoch": 26.91, "learning_rate": 3.678320610687022e-05, "loss": 0.7008, "step": 18700 }, { "epoch": 27.05, "learning_rate": 3.655419847328244e-05, "loss": 0.6815, "step": 18800 }, { "epoch": 27.19, "learning_rate": 3.6325190839694655e-05, "loss": 0.6783, "step": 18900 }, { "epoch": 27.34, "learning_rate": 3.609618320610687e-05, "loss": 0.685, "step": 19000 }, { "epoch": 27.34, "eval_loss": 0.11473936587572098, "eval_runtime": 56.5545, "eval_samples_per_second": 8.063, "eval_steps_per_second": 1.008, "eval_wer": 0.13047417698580488, "step": 19000 }, { "epoch": 27.48, "learning_rate": 3.586717557251908e-05, "loss": 0.6828, "step": 19100 }, { "epoch": 27.63, "learning_rate": 3.5638167938931296e-05, "loss": 0.6899, "step": 19200 }, { "epoch": 27.77, "learning_rate": 3.540916030534351e-05, "loss": 0.6889, "step": 19300 }, { "epoch": 27.91, "learning_rate": 3.518015267175572e-05, "loss": 0.689, "step": 19400 }, { "epoch": 28.06, "learning_rate": 3.4951145038167936e-05, "loss": 0.6811, "step": 19500 }, { "epoch": 28.06, "eval_loss": 0.11280036717653275, "eval_runtime": 57.3287, "eval_samples_per_second": 7.954, "eval_steps_per_second": 0.994, "eval_wer": 0.12443370582905466, "step": 19500 }, { "epoch": 28.2, "learning_rate": 3.472213740458015e-05, "loss": 0.6599, "step": 19600 }, { "epoch": 28.34, "learning_rate": 3.449312977099236e-05, "loss": 0.655, "step": 19700 }, { "epoch": 28.49, "learning_rate": 3.4264122137404576e-05, "loss": 0.6577, "step": 19800 }, { "epoch": 28.63, "learning_rate": 3.403511450381679e-05, "loss": 0.6562, "step": 19900 }, { "epoch": 28.78, "learning_rate": 3.3806106870229e-05, "loss": 0.6444, "step": 20000 }, { "epoch": 28.78, "eval_loss": 0.11200590431690216, "eval_runtime": 56.8504, "eval_samples_per_second": 8.021, "eval_steps_per_second": 1.003, "eval_wer": 0.1212624584717608, "step": 20000 }, { "epoch": 28.92, "learning_rate": 3.357709923664122e-05, "loss": 0.6543, "step": 20100 }, { "epoch": 29.06, "learning_rate": 3.334809160305344e-05, "loss": 0.6337, "step": 20200 }, { "epoch": 29.21, "learning_rate": 3.3119083969465644e-05, "loss": 0.613, "step": 20300 }, { "epoch": 29.35, "learning_rate": 3.2890076335877864e-05, "loss": 0.6197, "step": 20400 }, { "epoch": 29.5, "learning_rate": 3.266106870229007e-05, "loss": 0.6323, "step": 20500 }, { "epoch": 29.5, "eval_loss": 0.11374162137508392, "eval_runtime": 56.3556, "eval_samples_per_second": 8.091, "eval_steps_per_second": 1.011, "eval_wer": 0.11658109332527937, "step": 20500 }, { "epoch": 29.64, "learning_rate": 3.243206106870229e-05, "loss": 0.6182, "step": 20600 }, { "epoch": 29.78, "learning_rate": 3.2203053435114504e-05, "loss": 0.6298, "step": 20700 }, { "epoch": 29.93, "learning_rate": 3.197404580152672e-05, "loss": 0.6143, "step": 20800 }, { "epoch": 30.07, "learning_rate": 3.174503816793893e-05, "loss": 0.5943, "step": 20900 }, { "epoch": 30.22, "learning_rate": 3.1516030534351145e-05, "loss": 0.5998, "step": 21000 }, { "epoch": 30.22, "eval_loss": 0.10512539744377136, "eval_runtime": 56.712, "eval_samples_per_second": 8.041, "eval_steps_per_second": 1.005, "eval_wer": 0.1106916339474479, "step": 21000 }, { "epoch": 30.36, "learning_rate": 3.128702290076336e-05, "loss": 0.6093, "step": 21100 }, { "epoch": 30.5, "learning_rate": 3.1058015267175565e-05, "loss": 0.5948, "step": 21200 }, { "epoch": 30.65, "learning_rate": 3.0829007633587785e-05, "loss": 0.5847, "step": 21300 }, { "epoch": 30.79, "learning_rate": 3.06e-05, "loss": 0.5768, "step": 21400 }, { "epoch": 30.93, "learning_rate": 3.0370992366412212e-05, "loss": 0.5706, "step": 21500 }, { "epoch": 30.93, "eval_loss": 0.1035308688879013, "eval_runtime": 56.4523, "eval_samples_per_second": 8.078, "eval_steps_per_second": 1.01, "eval_wer": 0.10374509211718513, "step": 21500 }, { "epoch": 31.08, "learning_rate": 3.0141984732824422e-05, "loss": 0.5819, "step": 21600 }, { "epoch": 31.22, "learning_rate": 2.991297709923664e-05, "loss": 0.5529, "step": 21700 }, { "epoch": 31.37, "learning_rate": 2.9683969465648852e-05, "loss": 0.5691, "step": 21800 }, { "epoch": 31.51, "learning_rate": 2.9454961832061066e-05, "loss": 0.5622, "step": 21900 }, { "epoch": 31.65, "learning_rate": 2.922595419847328e-05, "loss": 0.5555, "step": 22000 }, { "epoch": 31.65, "eval_loss": 0.10311654210090637, "eval_runtime": 56.2781, "eval_samples_per_second": 8.103, "eval_steps_per_second": 1.013, "eval_wer": 0.09272123225611598, "step": 22000 }, { "epoch": 31.8, "learning_rate": 2.8996946564885493e-05, "loss": 0.5697, "step": 22100 }, { "epoch": 31.94, "learning_rate": 2.8767938931297706e-05, "loss": 0.5567, "step": 22200 }, { "epoch": 32.09, "learning_rate": 2.8538931297709923e-05, "loss": 0.5562, "step": 22300 }, { "epoch": 32.23, "learning_rate": 2.8309923664122133e-05, "loss": 0.5482, "step": 22400 }, { "epoch": 32.37, "learning_rate": 2.808091603053435e-05, "loss": 0.5389, "step": 22500 }, { "epoch": 32.37, "eval_loss": 0.09973499178886414, "eval_runtime": 56.482, "eval_samples_per_second": 8.073, "eval_steps_per_second": 1.009, "eval_wer": 0.09000302023557838, "step": 22500 }, { "epoch": 32.52, "learning_rate": 2.785419847328244e-05, "loss": 0.5398, "step": 22600 }, { "epoch": 32.66, "learning_rate": 2.7625190839694654e-05, "loss": 0.5284, "step": 22700 }, { "epoch": 32.8, "learning_rate": 2.739618320610687e-05, "loss": 0.5292, "step": 22800 }, { "epoch": 32.95, "learning_rate": 2.716717557251908e-05, "loss": 0.5186, "step": 22900 }, { "epoch": 33.09, "learning_rate": 2.6938167938931298e-05, "loss": 0.5201, "step": 23000 }, { "epoch": 33.09, "eval_loss": 0.09196200966835022, "eval_runtime": 56.8421, "eval_samples_per_second": 8.022, "eval_steps_per_second": 1.003, "eval_wer": 0.09121111446692842, "step": 23000 }, { "epoch": 33.24, "learning_rate": 2.6709160305343508e-05, "loss": 0.5205, "step": 23100 }, { "epoch": 33.38, "learning_rate": 2.648015267175572e-05, "loss": 0.5247, "step": 23200 }, { "epoch": 33.52, "learning_rate": 2.6251145038167938e-05, "loss": 0.5156, "step": 23300 }, { "epoch": 33.67, "learning_rate": 2.6022137404580148e-05, "loss": 0.5125, "step": 23400 }, { "epoch": 33.81, "learning_rate": 2.5793129770992365e-05, "loss": 0.5146, "step": 23500 }, { "epoch": 33.81, "eval_loss": 0.09293542802333832, "eval_runtime": 57.2076, "eval_samples_per_second": 7.971, "eval_steps_per_second": 0.996, "eval_wer": 0.0946843853820598, "step": 23500 }, { "epoch": 33.96, "learning_rate": 2.5564122137404575e-05, "loss": 0.5085, "step": 23600 }, { "epoch": 34.1, "learning_rate": 2.5335114503816792e-05, "loss": 0.5173, "step": 23700 }, { "epoch": 34.24, "learning_rate": 2.5106106870229002e-05, "loss": 0.5101, "step": 23800 }, { "epoch": 34.39, "learning_rate": 2.487709923664122e-05, "loss": 0.5056, "step": 23900 }, { "epoch": 34.53, "learning_rate": 2.4648091603053432e-05, "loss": 0.515, "step": 24000 }, { "epoch": 34.53, "eval_loss": 0.10004792362451553, "eval_runtime": 56.2386, "eval_samples_per_second": 8.108, "eval_steps_per_second": 1.014, "eval_wer": 0.09528843249773482, "step": 24000 }, { "epoch": 34.68, "learning_rate": 2.4419083969465646e-05, "loss": 0.5211, "step": 24100 }, { "epoch": 34.82, "learning_rate": 2.419007633587786e-05, "loss": 0.5065, "step": 24200 }, { "epoch": 34.96, "learning_rate": 2.3961068702290076e-05, "loss": 0.4992, "step": 24300 }, { "epoch": 35.11, "learning_rate": 2.3732061068702286e-05, "loss": 0.495, "step": 24400 }, { "epoch": 35.25, "learning_rate": 2.3503053435114503e-05, "loss": 0.4743, "step": 24500 }, { "epoch": 35.25, "eval_loss": 0.09216006845235825, "eval_runtime": 56.5494, "eval_samples_per_second": 8.064, "eval_steps_per_second": 1.008, "eval_wer": 0.0892479613409846, "step": 24500 }, { "epoch": 35.4, "learning_rate": 2.3274045801526713e-05, "loss": 0.4769, "step": 24600 }, { "epoch": 35.54, "learning_rate": 2.304503816793893e-05, "loss": 0.4774, "step": 24700 }, { "epoch": 35.68, "learning_rate": 2.28206106870229e-05, "loss": 0.487, "step": 24800 }, { "epoch": 35.83, "learning_rate": 2.259160305343511e-05, "loss": 0.4797, "step": 24900 }, { "epoch": 35.97, "learning_rate": 2.2362595419847328e-05, "loss": 0.4707, "step": 25000 }, { "epoch": 35.97, "eval_loss": 0.08524981886148453, "eval_runtime": 56.8651, "eval_samples_per_second": 8.019, "eval_steps_per_second": 1.002, "eval_wer": 0.08079130172153429, "step": 25000 }, { "epoch": 36.11, "learning_rate": 2.2133587786259538e-05, "loss": 0.4668, "step": 25100 }, { "epoch": 36.26, "learning_rate": 2.1904580152671755e-05, "loss": 0.476, "step": 25200 }, { "epoch": 36.4, "learning_rate": 2.167557251908397e-05, "loss": 0.4609, "step": 25300 }, { "epoch": 36.55, "learning_rate": 2.1446564885496182e-05, "loss": 0.4605, "step": 25400 }, { "epoch": 36.69, "learning_rate": 2.1217557251908395e-05, "loss": 0.4456, "step": 25500 }, { "epoch": 36.69, "eval_loss": 0.08548382669687271, "eval_runtime": 56.4053, "eval_samples_per_second": 8.084, "eval_steps_per_second": 1.011, "eval_wer": 0.07792207792207792, "step": 25500 }, { "epoch": 36.83, "learning_rate": 2.098854961832061e-05, "loss": 0.4515, "step": 25600 }, { "epoch": 36.98, "learning_rate": 2.0759541984732822e-05, "loss": 0.4426, "step": 25700 }, { "epoch": 37.12, "learning_rate": 2.053053435114504e-05, "loss": 0.4491, "step": 25800 }, { "epoch": 37.27, "learning_rate": 2.030152671755725e-05, "loss": 0.4464, "step": 25900 }, { "epoch": 37.41, "learning_rate": 2.0072519083969466e-05, "loss": 0.443, "step": 26000 }, { "epoch": 37.41, "eval_loss": 0.0842847004532814, "eval_runtime": 56.5582, "eval_samples_per_second": 8.062, "eval_steps_per_second": 1.008, "eval_wer": 0.07384475989127152, "step": 26000 }, { "epoch": 37.55, "learning_rate": 1.9843511450381676e-05, "loss": 0.4381, "step": 26100 }, { "epoch": 37.7, "learning_rate": 1.9614503816793893e-05, "loss": 0.4311, "step": 26200 }, { "epoch": 37.84, "learning_rate": 1.9385496183206106e-05, "loss": 0.4413, "step": 26300 }, { "epoch": 37.98, "learning_rate": 1.915648854961832e-05, "loss": 0.4186, "step": 26400 }, { "epoch": 38.13, "learning_rate": 1.8927480916030533e-05, "loss": 0.4388, "step": 26500 }, { "epoch": 38.13, "eval_loss": 0.081607885658741, "eval_runtime": 56.5857, "eval_samples_per_second": 8.059, "eval_steps_per_second": 1.007, "eval_wer": 0.06991845363938387, "step": 26500 }, { "epoch": 38.27, "learning_rate": 1.8698473282442747e-05, "loss": 0.4181, "step": 26600 }, { "epoch": 38.42, "learning_rate": 1.846946564885496e-05, "loss": 0.4338, "step": 26700 }, { "epoch": 38.56, "learning_rate": 1.8240458015267174e-05, "loss": 0.4186, "step": 26800 }, { "epoch": 38.7, "learning_rate": 1.8011450381679387e-05, "loss": 0.4169, "step": 26900 }, { "epoch": 38.85, "learning_rate": 1.77824427480916e-05, "loss": 0.4162, "step": 27000 }, { "epoch": 38.85, "eval_loss": 0.07516241818666458, "eval_runtime": 56.61, "eval_samples_per_second": 8.055, "eval_steps_per_second": 1.007, "eval_wer": 0.06448202959830866, "step": 27000 }, { "epoch": 38.99, "learning_rate": 1.7553435114503814e-05, "loss": 0.4076, "step": 27100 }, { "epoch": 39.14, "learning_rate": 1.7324427480916027e-05, "loss": 0.4073, "step": 27200 }, { "epoch": 39.28, "learning_rate": 1.7095419847328244e-05, "loss": 0.4042, "step": 27300 }, { "epoch": 39.42, "learning_rate": 1.6866412213740458e-05, "loss": 0.4035, "step": 27400 }, { "epoch": 39.57, "learning_rate": 1.663740458015267e-05, "loss": 0.3979, "step": 27500 }, { "epoch": 39.57, "eval_loss": 0.07611743360757828, "eval_runtime": 56.1996, "eval_samples_per_second": 8.114, "eval_steps_per_second": 1.014, "eval_wer": 0.062065841135608577, "step": 27500 }, { "epoch": 39.71, "learning_rate": 1.6408396946564885e-05, "loss": 0.3923, "step": 27600 }, { "epoch": 39.86, "learning_rate": 1.6179389312977098e-05, "loss": 0.395, "step": 27700 }, { "epoch": 40.0, "learning_rate": 1.595038167938931e-05, "loss": 0.3931, "step": 27800 }, { "epoch": 40.14, "learning_rate": 1.5721374045801525e-05, "loss": 0.3969, "step": 27900 }, { "epoch": 40.29, "learning_rate": 1.549236641221374e-05, "loss": 0.3889, "step": 28000 }, { "epoch": 40.29, "eval_loss": 0.07714105397462845, "eval_runtime": 56.3655, "eval_samples_per_second": 8.09, "eval_steps_per_second": 1.011, "eval_wer": 0.06251887647236484, "step": 28000 }, { "epoch": 40.43, "learning_rate": 1.5263358778625952e-05, "loss": 0.387, "step": 28100 }, { "epoch": 40.57, "learning_rate": 1.5034351145038167e-05, "loss": 0.3911, "step": 28200 }, { "epoch": 40.72, "learning_rate": 1.480534351145038e-05, "loss": 0.3865, "step": 28300 }, { "epoch": 40.86, "learning_rate": 1.4576335877862594e-05, "loss": 0.382, "step": 28400 }, { "epoch": 41.01, "learning_rate": 1.4347328244274809e-05, "loss": 0.3923, "step": 28500 }, { "epoch": 41.01, "eval_loss": 0.07554154098033905, "eval_runtime": 57.0575, "eval_samples_per_second": 7.992, "eval_steps_per_second": 0.999, "eval_wer": 0.059800664451827246, "step": 28500 }, { "epoch": 41.15, "learning_rate": 1.4118320610687023e-05, "loss": 0.3723, "step": 28600 }, { "epoch": 41.29, "learning_rate": 1.3889312977099236e-05, "loss": 0.3797, "step": 28700 }, { "epoch": 41.44, "learning_rate": 1.3662595419847327e-05, "loss": 0.3823, "step": 28800 }, { "epoch": 41.58, "learning_rate": 1.343358778625954e-05, "loss": 0.3691, "step": 28900 }, { "epoch": 41.73, "learning_rate": 1.3204580152671754e-05, "loss": 0.3693, "step": 29000 }, { "epoch": 41.73, "eval_loss": 0.07298705726861954, "eval_runtime": 59.4935, "eval_samples_per_second": 7.665, "eval_steps_per_second": 0.958, "eval_wer": 0.05783751132588342, "step": 29000 }, { "epoch": 41.87, "learning_rate": 1.2975572519083969e-05, "loss": 0.3739, "step": 29100 }, { "epoch": 42.01, "learning_rate": 1.2746564885496182e-05, "loss": 0.3797, "step": 29200 }, { "epoch": 42.16, "learning_rate": 1.2517557251908396e-05, "loss": 0.3704, "step": 29300 }, { "epoch": 42.3, "learning_rate": 1.2288549618320609e-05, "loss": 0.3702, "step": 29400 }, { "epoch": 42.45, "learning_rate": 1.2059541984732823e-05, "loss": 0.3642, "step": 29500 }, { "epoch": 42.45, "eval_loss": 0.07388342171907425, "eval_runtime": 56.6481, "eval_samples_per_second": 8.05, "eval_steps_per_second": 1.006, "eval_wer": 0.059800664451827246, "step": 29500 }, { "epoch": 42.59, "learning_rate": 1.1830534351145038e-05, "loss": 0.3665, "step": 29600 }, { "epoch": 42.73, "learning_rate": 1.1601526717557251e-05, "loss": 0.3626, "step": 29700 }, { "epoch": 42.88, "learning_rate": 1.1372519083969465e-05, "loss": 0.3499, "step": 29800 }, { "epoch": 43.02, "learning_rate": 1.1143511450381678e-05, "loss": 0.3632, "step": 29900 }, { "epoch": 43.17, "learning_rate": 1.0914503816793893e-05, "loss": 0.3532, "step": 30000 }, { "epoch": 43.17, "eval_loss": 0.07121992111206055, "eval_runtime": 57.0783, "eval_samples_per_second": 7.989, "eval_steps_per_second": 0.999, "eval_wer": 0.05527031108426457, "step": 30000 }, { "epoch": 43.31, "learning_rate": 1.0685496183206107e-05, "loss": 0.3531, "step": 30100 }, { "epoch": 43.45, "learning_rate": 1.045648854961832e-05, "loss": 0.361, "step": 30200 }, { "epoch": 43.6, "learning_rate": 1.0227480916030534e-05, "loss": 0.3508, "step": 30300 }, { "epoch": 43.74, "learning_rate": 9.998473282442747e-06, "loss": 0.352, "step": 30400 }, { "epoch": 43.88, "learning_rate": 9.769465648854962e-06, "loss": 0.3513, "step": 30500 }, { "epoch": 43.88, "eval_loss": 0.07616806775331497, "eval_runtime": 56.3541, "eval_samples_per_second": 8.092, "eval_steps_per_second": 1.011, "eval_wer": 0.051646028390214434, "step": 30500 }, { "epoch": 44.03, "learning_rate": 9.540458015267176e-06, "loss": 0.3447, "step": 30600 }, { "epoch": 44.17, "learning_rate": 9.311450381679389e-06, "loss": 0.3463, "step": 30700 }, { "epoch": 44.32, "learning_rate": 9.082442748091603e-06, "loss": 0.3373, "step": 30800 }, { "epoch": 44.46, "learning_rate": 8.855725190839693e-06, "loss": 0.3419, "step": 30900 }, { "epoch": 44.6, "learning_rate": 8.626717557251907e-06, "loss": 0.3349, "step": 31000 }, { "epoch": 44.6, "eval_loss": 0.07309506088495255, "eval_runtime": 56.1703, "eval_samples_per_second": 8.118, "eval_steps_per_second": 1.015, "eval_wer": 0.05043793415886439, "step": 31000 }, { "epoch": 44.75, "learning_rate": 8.397709923664122e-06, "loss": 0.3352, "step": 31100 }, { "epoch": 44.89, "learning_rate": 8.168702290076335e-06, "loss": 0.3376, "step": 31200 }, { "epoch": 45.04, "learning_rate": 7.939694656488549e-06, "loss": 0.3361, "step": 31300 }, { "epoch": 45.18, "learning_rate": 7.710687022900762e-06, "loss": 0.3318, "step": 31400 }, { "epoch": 45.32, "learning_rate": 7.4816793893129765e-06, "loss": 0.3305, "step": 31500 }, { "epoch": 45.32, "eval_loss": 0.0724666640162468, "eval_runtime": 58.1784, "eval_samples_per_second": 7.838, "eval_steps_per_second": 0.98, "eval_wer": 0.0507399577167019, "step": 31500 }, { "epoch": 45.47, "learning_rate": 7.25267175572519e-06, "loss": 0.3218, "step": 31600 }, { "epoch": 45.61, "learning_rate": 7.0259541984732824e-06, "loss": 0.3254, "step": 31700 }, { "epoch": 45.75, "learning_rate": 6.799236641221373e-06, "loss": 0.3256, "step": 31800 }, { "epoch": 45.9, "learning_rate": 6.570229007633587e-06, "loss": 0.3198, "step": 31900 }, { "epoch": 46.04, "learning_rate": 6.341221374045801e-06, "loss": 0.3285, "step": 32000 }, { "epoch": 46.04, "eval_loss": 0.07090699672698975, "eval_runtime": 59.4451, "eval_samples_per_second": 7.671, "eval_steps_per_second": 0.959, "eval_wer": 0.04892781636967684, "step": 32000 }, { "epoch": 46.19, "learning_rate": 6.112213740458014e-06, "loss": 0.323, "step": 32100 }, { "epoch": 46.33, "learning_rate": 5.883206106870229e-06, "loss": 0.3175, "step": 32200 }, { "epoch": 46.47, "learning_rate": 5.654198473282442e-06, "loss": 0.3191, "step": 32300 }, { "epoch": 46.62, "learning_rate": 5.425190839694656e-06, "loss": 0.313, "step": 32400 }, { "epoch": 46.76, "learning_rate": 5.19618320610687e-06, "loss": 0.3179, "step": 32500 }, { "epoch": 46.76, "eval_loss": 0.06665544956922531, "eval_runtime": 56.3776, "eval_samples_per_second": 8.088, "eval_steps_per_second": 1.011, "eval_wer": 0.0466626396858955, "step": 32500 }, { "epoch": 46.91, "learning_rate": 4.967175572519083e-06, "loss": 0.316, "step": 32600 }, { "epoch": 47.05, "learning_rate": 4.738167938931298e-06, "loss": 0.3107, "step": 32700 }, { "epoch": 47.19, "learning_rate": 4.509160305343511e-06, "loss": 0.3094, "step": 32800 }, { "epoch": 47.34, "learning_rate": 4.2801526717557246e-06, "loss": 0.3161, "step": 32900 }, { "epoch": 47.48, "learning_rate": 4.051145038167939e-06, "loss": 0.3158, "step": 33000 }, { "epoch": 47.48, "eval_loss": 0.06528327614068985, "eval_runtime": 56.0724, "eval_samples_per_second": 8.132, "eval_steps_per_second": 1.017, "eval_wer": 0.0493808517064331, "step": 33000 }, { "epoch": 47.63, "learning_rate": 3.822137404580152e-06, "loss": 0.3153, "step": 33100 }, { "epoch": 47.77, "learning_rate": 3.593129770992366e-06, "loss": 0.3005, "step": 33200 }, { "epoch": 47.91, "learning_rate": 3.36412213740458e-06, "loss": 0.3089, "step": 33300 }, { "epoch": 48.06, "learning_rate": 3.135114503816794e-06, "loss": 0.3068, "step": 33400 }, { "epoch": 48.2, "learning_rate": 2.906106870229007e-06, "loss": 0.3033, "step": 33500 }, { "epoch": 48.2, "eval_loss": 0.06382497400045395, "eval_runtime": 57.4782, "eval_samples_per_second": 7.933, "eval_steps_per_second": 0.992, "eval_wer": 0.04560555723346421, "step": 33500 }, { "epoch": 48.34, "learning_rate": 2.677099236641221e-06, "loss": 0.3039, "step": 33600 }, { "epoch": 48.49, "learning_rate": 2.4480916030534347e-06, "loss": 0.2983, "step": 33700 }, { "epoch": 48.63, "learning_rate": 2.219083969465649e-06, "loss": 0.3069, "step": 33800 }, { "epoch": 48.78, "learning_rate": 1.9900763358778624e-06, "loss": 0.2987, "step": 33900 }, { "epoch": 48.92, "learning_rate": 1.761068702290076e-06, "loss": 0.3023, "step": 34000 }, { "epoch": 48.92, "eval_loss": 0.06439350545406342, "eval_runtime": 56.5994, "eval_samples_per_second": 8.057, "eval_steps_per_second": 1.007, "eval_wer": 0.04636061612805799, "step": 34000 }, { "epoch": 49.06, "learning_rate": 1.53206106870229e-06, "loss": 0.2919, "step": 34100 }, { "epoch": 49.21, "learning_rate": 1.3030534351145036e-06, "loss": 0.2912, "step": 34200 }, { "epoch": 49.35, "learning_rate": 1.0740458015267175e-06, "loss": 0.3036, "step": 34300 }, { "epoch": 49.5, "learning_rate": 8.450381679389312e-07, "loss": 0.3025, "step": 34400 }, { "epoch": 49.64, "learning_rate": 6.160305343511449e-07, "loss": 0.2975, "step": 34500 }, { "epoch": 49.64, "eval_loss": 0.06428828090429306, "eval_runtime": 56.8585, "eval_samples_per_second": 8.02, "eval_steps_per_second": 1.002, "eval_wer": 0.045454545454545456, "step": 34500 }, { "epoch": 49.78, "learning_rate": 3.8702290076335876e-07, "loss": 0.2955, "step": 34600 }, { "epoch": 49.93, "learning_rate": 1.580152671755725e-07, "loss": 0.2952, "step": 34700 }, { "epoch": 50.0, "step": 34750, "total_flos": 8.643412752890073e+20, "train_loss": 1.0736439298451375, "train_runtime": 194988.743, "train_samples_per_second": 5.709, "train_steps_per_second": 0.178 } ], "max_steps": 34750, "num_train_epochs": 50, "total_flos": 8.643412752890073e+20, "trial_name": null, "trial_params": null }