{ "best_metric": null, "best_model_checkpoint": null, "epoch": 37.90553745928339, "global_step": 5800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.65, "learning_rate": 0.0001, "loss": 1.0071, "step": 100 }, { "epoch": 0.65, "eval_loss": 0.3728577196598053, "eval_runtime": 169.6136, "eval_samples_per_second": 19.48, "eval_steps_per_second": 2.435, "eval_wer": 0.5494842925753878, "step": 100 }, { "epoch": 1.31, "learning_rate": 9.918652891889694e-05, "loss": 0.6823, "step": 200 }, { "epoch": 1.31, "eval_loss": 0.33882805705070496, "eval_runtime": 167.327, "eval_samples_per_second": 19.746, "eval_steps_per_second": 2.468, "eval_wer": 0.5344461066057791, "step": 200 }, { "epoch": 1.96, "learning_rate": 9.837305783779387e-05, "loss": 0.6063, "step": 300 }, { "epoch": 1.96, "eval_loss": 0.3208909332752228, "eval_runtime": 174.661, "eval_samples_per_second": 18.917, "eval_steps_per_second": 2.365, "eval_wer": 0.5098811117234864, "step": 300 }, { "epoch": 2.61, "learning_rate": 9.755958675669081e-05, "loss": 0.5326, "step": 400 }, { "epoch": 2.61, "eval_loss": 0.30963289737701416, "eval_runtime": 177.329, "eval_samples_per_second": 18.632, "eval_steps_per_second": 2.329, "eval_wer": 0.5025588536335721, "step": 400 }, { "epoch": 3.27, "learning_rate": 9.674611567558773e-05, "loss": 0.5074, "step": 500 }, { "epoch": 3.27, "eval_loss": 0.3118290901184082, "eval_runtime": 173.1863, "eval_samples_per_second": 19.078, "eval_steps_per_second": 2.385, "eval_wer": 0.4959452011652626, "step": 500 }, { "epoch": 3.92, "learning_rate": 9.593264459448467e-05, "loss": 0.446, "step": 600 }, { "epoch": 3.92, "eval_loss": 0.30445897579193115, "eval_runtime": 173.9457, "eval_samples_per_second": 18.994, "eval_steps_per_second": 2.374, "eval_wer": 0.4865758601684907, "step": 600 }, { "epoch": 4.57, "learning_rate": 9.51191735133816e-05, "loss": 0.4283, "step": 700 }, { "epoch": 4.57, "eval_loss": 0.30916285514831543, "eval_runtime": 176.2446, "eval_samples_per_second": 18.747, "eval_steps_per_second": 2.343, "eval_wer": 0.48366270372411624, "step": 700 }, { "epoch": 5.23, "learning_rate": 9.430570243227855e-05, "loss": 0.41, "step": 800 }, { "epoch": 5.23, "eval_loss": 0.35956883430480957, "eval_runtime": 169.7235, "eval_samples_per_second": 19.467, "eval_steps_per_second": 2.433, "eval_wer": 0.4932682465947563, "step": 800 }, { "epoch": 5.88, "learning_rate": 9.349223135117547e-05, "loss": 0.3802, "step": 900 }, { "epoch": 5.88, "eval_loss": 0.3234783411026001, "eval_runtime": 171.4522, "eval_samples_per_second": 19.271, "eval_steps_per_second": 2.409, "eval_wer": 0.47752145500354304, "step": 900 }, { "epoch": 6.53, "learning_rate": 9.267876027007241e-05, "loss": 0.3852, "step": 1000 }, { "epoch": 6.53, "eval_loss": 0.32342973351478577, "eval_runtime": 184.4459, "eval_samples_per_second": 17.913, "eval_steps_per_second": 2.239, "eval_wer": 0.47815132666719157, "step": 1000 }, { "epoch": 7.19, "learning_rate": 9.186528918896934e-05, "loss": 0.3539, "step": 1100 }, { "epoch": 7.19, "eval_loss": 0.33684083819389343, "eval_runtime": 170.8359, "eval_samples_per_second": 19.34, "eval_steps_per_second": 2.418, "eval_wer": 0.4796472718683568, "step": 1100 }, { "epoch": 7.84, "learning_rate": 9.105181810786628e-05, "loss": 0.3444, "step": 1200 }, { "epoch": 7.84, "eval_loss": 0.3268304169178009, "eval_runtime": 170.984, "eval_samples_per_second": 19.323, "eval_steps_per_second": 2.415, "eval_wer": 0.4732698212739154, "step": 1200 }, { "epoch": 8.5, "learning_rate": 9.02383470267632e-05, "loss": 0.336, "step": 1300 }, { "epoch": 8.5, "eval_loss": 0.34285250306129456, "eval_runtime": 171.7981, "eval_samples_per_second": 19.232, "eval_steps_per_second": 2.404, "eval_wer": 0.479883473742225, "step": 1300 }, { "epoch": 9.15, "learning_rate": 8.942487594566014e-05, "loss": 0.3041, "step": 1400 }, { "epoch": 9.15, "eval_loss": 0.35453349351882935, "eval_runtime": 172.0678, "eval_samples_per_second": 19.202, "eval_steps_per_second": 2.4, "eval_wer": 0.46248326903393433, "step": 1400 }, { "epoch": 9.8, "learning_rate": 8.861140486455706e-05, "loss": 0.3074, "step": 1500 }, { "epoch": 9.8, "eval_loss": 0.3339354693889618, "eval_runtime": 172.1803, "eval_samples_per_second": 19.189, "eval_steps_per_second": 2.399, "eval_wer": 0.46319187465553896, "step": 1500 }, { "epoch": 10.46, "learning_rate": 8.7797933783454e-05, "loss": 0.2948, "step": 1600 }, { "epoch": 10.46, "eval_loss": 0.34325212240219116, "eval_runtime": 171.9876, "eval_samples_per_second": 19.211, "eval_steps_per_second": 2.401, "eval_wer": 0.4646090858987481, "step": 1600 }, { "epoch": 11.11, "learning_rate": 8.698446270235093e-05, "loss": 0.2905, "step": 1700 }, { "epoch": 11.11, "eval_loss": 0.34282687306404114, "eval_runtime": 172.5719, "eval_samples_per_second": 19.146, "eval_steps_per_second": 2.393, "eval_wer": 0.4641366821510117, "step": 1700 }, { "epoch": 11.76, "learning_rate": 8.617099162124787e-05, "loss": 0.296, "step": 1800 }, { "epoch": 11.76, "eval_loss": 0.35734105110168457, "eval_runtime": 173.1126, "eval_samples_per_second": 19.086, "eval_steps_per_second": 2.386, "eval_wer": 0.4665774348476498, "step": 1800 }, { "epoch": 12.42, "learning_rate": 8.535752054014479e-05, "loss": 0.2669, "step": 1900 }, { "epoch": 12.42, "eval_loss": 0.34095147252082825, "eval_runtime": 172.9162, "eval_samples_per_second": 19.108, "eval_steps_per_second": 2.388, "eval_wer": 0.46224706716006614, "step": 1900 }, { "epoch": 13.07, "learning_rate": 8.454404945904173e-05, "loss": 0.2778, "step": 2000 }, { "epoch": 13.07, "eval_loss": 0.3445983827114105, "eval_runtime": 172.7429, "eval_samples_per_second": 19.127, "eval_steps_per_second": 2.391, "eval_wer": 0.4621683332021101, "step": 2000 }, { "epoch": 13.72, "learning_rate": 8.373057837793867e-05, "loss": 0.2605, "step": 2100 }, { "epoch": 13.72, "eval_loss": 0.364580363035202, "eval_runtime": 173.3351, "eval_samples_per_second": 19.061, "eval_steps_per_second": 2.383, "eval_wer": 0.4611447917486812, "step": 2100 }, { "epoch": 14.38, "learning_rate": 8.291710729683561e-05, "loss": 0.2562, "step": 2200 }, { "epoch": 14.38, "eval_loss": 0.3529307544231415, "eval_runtime": 173.4538, "eval_samples_per_second": 19.048, "eval_steps_per_second": 2.381, "eval_wer": 0.46201086528619795, "step": 2200 }, { "epoch": 15.03, "learning_rate": 8.210363621573253e-05, "loss": 0.2587, "step": 2300 }, { "epoch": 15.03, "eval_loss": 0.35722818970680237, "eval_runtime": 173.1723, "eval_samples_per_second": 19.079, "eval_steps_per_second": 2.385, "eval_wer": 0.4694118573340682, "step": 2300 }, { "epoch": 15.68, "learning_rate": 8.129016513462947e-05, "loss": 0.242, "step": 2400 }, { "epoch": 15.68, "eval_loss": 0.36534029245376587, "eval_runtime": 173.1065, "eval_samples_per_second": 19.087, "eval_steps_per_second": 2.386, "eval_wer": 0.45894024092591135, "step": 2400 }, { "epoch": 16.34, "learning_rate": 8.047669405352641e-05, "loss": 0.232, "step": 2500 }, { "epoch": 16.34, "eval_loss": 0.34964719414711, "eval_runtime": 174.2382, "eval_samples_per_second": 18.963, "eval_steps_per_second": 2.37, "eval_wer": 0.4605149200850327, "step": 2500 }, { "epoch": 16.99, "learning_rate": 7.966322297242333e-05, "loss": 0.2474, "step": 2600 }, { "epoch": 16.99, "eval_loss": 0.3596344590187073, "eval_runtime": 174.0298, "eval_samples_per_second": 18.985, "eval_steps_per_second": 2.373, "eval_wer": 0.46783717817494685, "step": 2600 }, { "epoch": 17.64, "learning_rate": 7.884975189132027e-05, "loss": 0.2137, "step": 2700 }, { "epoch": 17.64, "eval_loss": 0.3547351360321045, "eval_runtime": 174.6108, "eval_samples_per_second": 18.922, "eval_steps_per_second": 2.365, "eval_wer": 0.4609873238327691, "step": 2700 }, { "epoch": 18.3, "learning_rate": 7.80362808102172e-05, "loss": 0.2261, "step": 2800 }, { "epoch": 18.3, "eval_loss": 0.35713937878608704, "eval_runtime": 173.8691, "eval_samples_per_second": 19.003, "eval_steps_per_second": 2.375, "eval_wer": 0.4579954334304385, "step": 2800 }, { "epoch": 18.95, "learning_rate": 7.723094443992517e-05, "loss": 0.2141, "step": 2900 }, { "epoch": 18.95, "eval_loss": 0.36411064863204956, "eval_runtime": 174.3463, "eval_samples_per_second": 18.951, "eval_steps_per_second": 2.369, "eval_wer": 0.45563341469175656, "step": 2900 }, { "epoch": 19.61, "learning_rate": 7.64174733588221e-05, "loss": 0.2201, "step": 3000 }, { "epoch": 19.61, "eval_loss": 0.34566032886505127, "eval_runtime": 173.9331, "eval_samples_per_second": 18.996, "eval_steps_per_second": 2.374, "eval_wer": 0.45303519407920634, "step": 3000 }, { "epoch": 20.26, "learning_rate": 7.560400227771903e-05, "loss": 0.2243, "step": 3100 }, { "epoch": 20.26, "eval_loss": 0.3523178994655609, "eval_runtime": 174.1671, "eval_samples_per_second": 18.97, "eval_steps_per_second": 2.371, "eval_wer": 0.4571293598929218, "step": 3100 }, { "epoch": 20.91, "learning_rate": 7.479053119661597e-05, "loss": 0.1891, "step": 3200 }, { "epoch": 20.91, "eval_loss": 0.337533563375473, "eval_runtime": 174.612, "eval_samples_per_second": 18.922, "eval_steps_per_second": 2.365, "eval_wer": 0.4541374694905913, "step": 3200 }, { "epoch": 21.57, "learning_rate": 7.39770601155129e-05, "loss": 0.2033, "step": 3300 }, { "epoch": 21.57, "eval_loss": 0.3634466230869293, "eval_runtime": 174.6521, "eval_samples_per_second": 18.918, "eval_steps_per_second": 2.365, "eval_wer": 0.4579166994724825, "step": 3300 }, { "epoch": 22.22, "learning_rate": 7.316358903440983e-05, "loss": 0.2035, "step": 3400 }, { "epoch": 22.22, "eval_loss": 0.3793589174747467, "eval_runtime": 174.394, "eval_samples_per_second": 18.946, "eval_steps_per_second": 2.368, "eval_wer": 0.4555546807338005, "step": 3400 }, { "epoch": 22.87, "learning_rate": 7.235011795330676e-05, "loss": 0.1867, "step": 3500 }, { "epoch": 22.87, "eval_loss": 0.37910905480384827, "eval_runtime": 174.9971, "eval_samples_per_second": 18.88, "eval_steps_per_second": 2.36, "eval_wer": 0.454924809070152, "step": 3500 }, { "epoch": 23.53, "learning_rate": 7.15366468722037e-05, "loss": 0.1956, "step": 3600 }, { "epoch": 23.53, "eval_loss": 0.3568515479564667, "eval_runtime": 174.799, "eval_samples_per_second": 18.902, "eval_steps_per_second": 2.363, "eval_wer": 0.45760176364065824, "step": 3600 }, { "epoch": 24.18, "learning_rate": 7.072317579110062e-05, "loss": 0.1826, "step": 3700 }, { "epoch": 24.18, "eval_loss": 0.3747410178184509, "eval_runtime": 175.1918, "eval_samples_per_second": 18.859, "eval_steps_per_second": 2.357, "eval_wer": 0.4543736713644595, "step": 3700 }, { "epoch": 24.83, "learning_rate": 6.99178394208086e-05, "loss": 0.1867, "step": 3800 }, { "epoch": 24.83, "eval_loss": 0.36731651425361633, "eval_runtime": 175.3726, "eval_samples_per_second": 18.84, "eval_steps_per_second": 2.355, "eval_wer": 0.45366506574285487, "step": 3800 }, { "epoch": 25.49, "learning_rate": 6.910436833970553e-05, "loss": 0.1902, "step": 3900 }, { "epoch": 25.49, "eval_loss": 0.3835786283016205, "eval_runtime": 182.8434, "eval_samples_per_second": 18.07, "eval_steps_per_second": 2.259, "eval_wer": 0.4522478544996457, "step": 3900 }, { "epoch": 26.14, "learning_rate": 6.829089725860246e-05, "loss": 0.1786, "step": 4000 }, { "epoch": 26.14, "eval_loss": 0.3528241813182831, "eval_runtime": 182.8588, "eval_samples_per_second": 18.069, "eval_steps_per_second": 2.259, "eval_wer": 0.4485473584757106, "step": 4000 }, { "epoch": 26.79, "learning_rate": 6.74774261774994e-05, "loss": 0.178, "step": 4100 }, { "epoch": 26.79, "eval_loss": 0.3756342828273773, "eval_runtime": 183.4843, "eval_samples_per_second": 18.007, "eval_steps_per_second": 2.251, "eval_wer": 0.45303519407920634, "step": 4100 }, { "epoch": 27.45, "learning_rate": 6.666395509639632e-05, "loss": 0.1783, "step": 4200 }, { "epoch": 27.45, "eval_loss": 0.38552403450012207, "eval_runtime": 185.3761, "eval_samples_per_second": 17.823, "eval_steps_per_second": 2.228, "eval_wer": 0.4515392488780411, "step": 4200 }, { "epoch": 28.1, "learning_rate": 6.585048401529326e-05, "loss": 0.1747, "step": 4300 }, { "epoch": 28.1, "eval_loss": 0.3594723045825958, "eval_runtime": 184.1815, "eval_samples_per_second": 17.939, "eval_steps_per_second": 2.242, "eval_wer": 0.4475238170222817, "step": 4300 }, { "epoch": 28.76, "learning_rate": 6.503701293419018e-05, "loss": 0.1776, "step": 4400 }, { "epoch": 28.76, "eval_loss": 0.3899536728858948, "eval_runtime": 183.8028, "eval_samples_per_second": 17.976, "eval_steps_per_second": 2.247, "eval_wer": 0.45303519407920634, "step": 4400 }, { "epoch": 29.41, "learning_rate": 6.422354185308712e-05, "loss": 0.1615, "step": 4500 }, { "epoch": 29.41, "eval_loss": 0.37925612926483154, "eval_runtime": 184.3645, "eval_samples_per_second": 17.921, "eval_steps_per_second": 2.24, "eval_wer": 0.4487048263916227, "step": 4500 }, { "epoch": 30.07, "learning_rate": 6.341007077198405e-05, "loss": 0.1665, "step": 4600 }, { "epoch": 30.07, "eval_loss": 0.3769548237323761, "eval_runtime": 185.5661, "eval_samples_per_second": 17.805, "eval_steps_per_second": 2.226, "eval_wer": 0.4504369734666562, "step": 4600 }, { "epoch": 30.72, "learning_rate": 6.2596599690881e-05, "loss": 0.1562, "step": 4700 }, { "epoch": 30.72, "eval_loss": 0.38725826144218445, "eval_runtime": 184.2843, "eval_samples_per_second": 17.929, "eval_steps_per_second": 2.241, "eval_wer": 0.45090937721439256, "step": 4700 }, { "epoch": 31.37, "learning_rate": 6.178312860977793e-05, "loss": 0.1558, "step": 4800 }, { "epoch": 31.37, "eval_loss": 0.37403690814971924, "eval_runtime": 184.7842, "eval_samples_per_second": 17.88, "eval_steps_per_second": 2.235, "eval_wer": 0.4494134320132273, "step": 4800 }, { "epoch": 32.03, "learning_rate": 6.0969657528674864e-05, "loss": 0.1574, "step": 4900 }, { "epoch": 32.03, "eval_loss": 0.38782382011413574, "eval_runtime": 185.4497, "eval_samples_per_second": 17.816, "eval_steps_per_second": 2.227, "eval_wer": 0.44933469805527126, "step": 4900 }, { "epoch": 32.68, "learning_rate": 6.0156186447571796e-05, "loss": 0.152, "step": 5000 }, { "epoch": 32.68, "eval_loss": 0.36702463030815125, "eval_runtime": 184.067, "eval_samples_per_second": 17.95, "eval_steps_per_second": 2.244, "eval_wer": 0.44933469805527126, "step": 5000 }, { "epoch": 33.33, "learning_rate": 5.934271536646873e-05, "loss": 0.1477, "step": 5100 }, { "epoch": 33.33, "eval_loss": 0.36524683237075806, "eval_runtime": 186.7144, "eval_samples_per_second": 17.695, "eval_steps_per_second": 2.212, "eval_wer": 0.4496496338870955, "step": 5100 }, { "epoch": 33.98, "learning_rate": 5.852924428536566e-05, "loss": 0.1561, "step": 5200 }, { "epoch": 33.98, "eval_loss": 0.3987789452075958, "eval_runtime": 177.1487, "eval_samples_per_second": 18.651, "eval_steps_per_second": 2.331, "eval_wer": 0.4535863317848988, "step": 5200 }, { "epoch": 34.64, "learning_rate": 5.771577320426259e-05, "loss": 0.1441, "step": 5300 }, { "epoch": 34.64, "eval_loss": 0.37290704250335693, "eval_runtime": 178.4122, "eval_samples_per_second": 18.519, "eval_steps_per_second": 2.315, "eval_wer": 0.4471301472325014, "step": 5300 }, { "epoch": 35.29, "learning_rate": 5.691043683397056e-05, "loss": 0.1462, "step": 5400 }, { "epoch": 35.29, "eval_loss": 0.3913721740245819, "eval_runtime": 175.0751, "eval_samples_per_second": 18.872, "eval_steps_per_second": 2.359, "eval_wer": 0.4488622943075348, "step": 5400 }, { "epoch": 35.94, "learning_rate": 5.6096965752867494e-05, "loss": 0.1388, "step": 5500 }, { "epoch": 35.94, "eval_loss": 0.3886808454990387, "eval_runtime": 175.0289, "eval_samples_per_second": 18.877, "eval_steps_per_second": 2.36, "eval_wer": 0.44807495472797415, "step": 5500 }, { "epoch": 36.6, "learning_rate": 5.5283494671764426e-05, "loss": 0.1362, "step": 5600 }, { "epoch": 36.6, "eval_loss": 0.3816515803337097, "eval_runtime": 175.1136, "eval_samples_per_second": 18.868, "eval_steps_per_second": 2.358, "eval_wer": 0.445476734115424, "step": 5600 }, { "epoch": 37.25, "learning_rate": 5.447002359066136e-05, "loss": 0.1439, "step": 5700 }, { "epoch": 37.25, "eval_loss": 0.39244014024734497, "eval_runtime": 175.9891, "eval_samples_per_second": 18.774, "eval_steps_per_second": 2.347, "eval_wer": 0.446657743484765, "step": 5700 }, { "epoch": 37.91, "learning_rate": 5.365655250955829e-05, "loss": 0.1299, "step": 5800 }, { "epoch": 37.91, "eval_loss": 0.3693729341030121, "eval_runtime": 176.2438, "eval_samples_per_second": 18.747, "eval_steps_per_second": 2.343, "eval_wer": 0.4430359814187859, "step": 5800 } ], "max_steps": 12393, "num_train_epochs": 81, "total_flos": 2.5038038866869117e+19, "trial_name": null, "trial_params": null }