{ "best_metric": null, "best_model_checkpoint": null, "epoch": 49.95402298850575, "global_step": 8692, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.57, "learning_rate": 9.123417721518986e-06, "loss": 14.6554, "step": 100 }, { "epoch": 1.15, "learning_rate": 1.766772151898734e-05, "loss": 6.292, "step": 200 }, { "epoch": 1.72, "learning_rate": 2.6212025316455694e-05, "loss": 4.5602, "step": 300 }, { "epoch": 2.3, "learning_rate": 3.475632911392405e-05, "loss": 3.675, "step": 400 }, { "epoch": 2.3, "eval_cer": 1.0, "eval_loss": 3.505185604095459, "eval_runtime": 388.8147, "eval_samples_per_second": 19.927, "eval_steps_per_second": 0.314, "eval_wer": 1.0, "step": 400 }, { "epoch": 2.87, "learning_rate": 4.3300632911392406e-05, "loss": 3.4053, "step": 500 }, { "epoch": 3.45, "learning_rate": 5.184493670886076e-05, "loss": 3.3269, "step": 600 }, { "epoch": 4.02, "learning_rate": 6.038924050632911e-05, "loss": 3.2702, "step": 700 }, { "epoch": 4.6, "learning_rate": 6.893354430379747e-05, "loss": 3.0446, "step": 800 }, { "epoch": 4.6, "eval_cer": 0.52146416703672, "eval_loss": 2.2759008407592773, "eval_runtime": 391.3771, "eval_samples_per_second": 19.797, "eval_steps_per_second": 0.312, "eval_wer": 1.0052476497825171, "step": 800 }, { "epoch": 5.17, "learning_rate": 7.5e-05, "loss": 2.3045, "step": 900 }, { "epoch": 5.75, "learning_rate": 7.5e-05, "loss": 1.9583, "step": 1000 }, { "epoch": 6.32, "learning_rate": 7.5e-05, "loss": 1.8142, "step": 1100 }, { "epoch": 6.9, "learning_rate": 7.5e-05, "loss": 1.7276, "step": 1200 }, { "epoch": 6.9, "eval_cer": 0.19686409281152592, "eval_loss": 0.7083391547203064, "eval_runtime": 386.6895, "eval_samples_per_second": 20.037, "eval_steps_per_second": 0.315, "eval_wer": 0.6697067489827417, "step": 1200 }, { "epoch": 7.47, "learning_rate": 7.5e-05, "loss": 1.6571, "step": 1300 }, { "epoch": 8.05, "learning_rate": 7.5e-05, "loss": 1.6004, "step": 1400 }, { "epoch": 8.62, "learning_rate": 7.5e-05, "loss": 1.5572, "step": 1500 }, { "epoch": 9.2, "learning_rate": 7.5e-05, "loss": 1.5171, "step": 1600 }, { "epoch": 9.2, "eval_cer": 0.15683826416017196, "eval_loss": 0.5328246355056763, "eval_runtime": 388.9054, "eval_samples_per_second": 19.923, "eval_steps_per_second": 0.314, "eval_wer": 0.5733408166128806, "step": 1600 }, { "epoch": 9.77, "learning_rate": 7.5e-05, "loss": 1.4951, "step": 1700 }, { "epoch": 10.34, "learning_rate": 7.5e-05, "loss": 1.4553, "step": 1800 }, { "epoch": 10.92, "learning_rate": 7.5e-05, "loss": 1.4348, "step": 1900 }, { "epoch": 11.49, "learning_rate": 7.5e-05, "loss": 1.4176, "step": 2000 }, { "epoch": 11.49, "eval_cer": 0.13810176574442937, "eval_loss": 0.4571371376514435, "eval_runtime": 390.2121, "eval_samples_per_second": 19.856, "eval_steps_per_second": 0.313, "eval_wer": 0.5160656657780273, "step": 2000 }, { "epoch": 12.07, "learning_rate": 7.5e-05, "loss": 1.3988, "step": 2100 }, { "epoch": 12.64, "learning_rate": 7.5e-05, "loss": 1.3666, "step": 2200 }, { "epoch": 13.22, "learning_rate": 7.5e-05, "loss": 1.3585, "step": 2300 }, { "epoch": 13.79, "learning_rate": 7.5e-05, "loss": 1.343, "step": 2400 }, { "epoch": 13.79, "eval_cer": 0.11596506696356744, "eval_loss": 0.3910418748855591, "eval_runtime": 386.6143, "eval_samples_per_second": 20.041, "eval_steps_per_second": 0.316, "eval_wer": 0.45223796828960294, "step": 2400 }, { "epoch": 14.37, "learning_rate": 7.5e-05, "loss": 1.3196, "step": 2500 }, { "epoch": 14.94, "learning_rate": 7.5e-05, "loss": 1.3085, "step": 2600 }, { "epoch": 15.52, "learning_rate": 7.5e-05, "loss": 1.3006, "step": 2700 }, { "epoch": 16.09, "learning_rate": 7.5e-05, "loss": 1.2743, "step": 2800 }, { "epoch": 16.09, "eval_cer": 0.10436362622251684, "eval_loss": 0.3533952534198761, "eval_runtime": 386.8689, "eval_samples_per_second": 20.027, "eval_steps_per_second": 0.315, "eval_wer": 0.41365230812403536, "step": 2800 }, { "epoch": 16.67, "learning_rate": 7.5e-05, "loss": 1.256, "step": 2900 }, { "epoch": 17.24, "learning_rate": 7.5e-05, "loss": 1.2627, "step": 3000 }, { "epoch": 17.82, "learning_rate": 7.5e-05, "loss": 1.2496, "step": 3100 }, { "epoch": 18.39, "learning_rate": 7.5e-05, "loss": 1.2396, "step": 3200 }, { "epoch": 18.39, "eval_cer": 0.09588994077001131, "eval_loss": 0.3277980387210846, "eval_runtime": 388.8491, "eval_samples_per_second": 19.925, "eval_steps_per_second": 0.314, "eval_wer": 0.38773677564192505, "step": 3200 }, { "epoch": 18.97, "learning_rate": 7.5e-05, "loss": 1.2291, "step": 3300 }, { "epoch": 19.54, "learning_rate": 7.5e-05, "loss": 1.2254, "step": 3400 }, { "epoch": 20.11, "learning_rate": 7.5e-05, "loss": 1.2063, "step": 3500 }, { "epoch": 20.69, "learning_rate": 7.5e-05, "loss": 1.2035, "step": 3600 }, { "epoch": 20.69, "eval_cer": 0.09172603609575479, "eval_loss": 0.31091800332069397, "eval_runtime": 384.3384, "eval_samples_per_second": 20.159, "eval_steps_per_second": 0.317, "eval_wer": 0.3740844675178897, "step": 3600 }, { "epoch": 21.26, "learning_rate": 7.5e-05, "loss": 1.1937, "step": 3700 }, { "epoch": 21.84, "learning_rate": 7.5e-05, "loss": 1.1787, "step": 3800 }, { "epoch": 22.41, "learning_rate": 7.5e-05, "loss": 1.1779, "step": 3900 }, { "epoch": 22.99, "learning_rate": 7.5e-05, "loss": 1.1745, "step": 4000 }, { "epoch": 22.99, "eval_cer": 0.08818425010028982, "eval_loss": 0.2971595227718353, "eval_runtime": 385.6559, "eval_samples_per_second": 20.09, "eval_steps_per_second": 0.316, "eval_wer": 0.3618212431598148, "step": 4000 }, { "epoch": 23.56, "learning_rate": 7.5e-05, "loss": 1.1697, "step": 4100 }, { "epoch": 24.14, "learning_rate": 7.5e-05, "loss": 1.1506, "step": 4200 }, { "epoch": 24.71, "learning_rate": 7.5e-05, "loss": 1.1512, "step": 4300 }, { "epoch": 25.29, "learning_rate": 7.414768806073154e-05, "loss": 1.1541, "step": 4400 }, { "epoch": 25.29, "eval_cer": 0.08323089827501506, "eval_loss": 0.28362834453582764, "eval_runtime": 385.7883, "eval_samples_per_second": 20.084, "eval_steps_per_second": 0.316, "eval_wer": 0.3427108180159955, "step": 4400 }, { "epoch": 25.86, "learning_rate": 7.250862663906142e-05, "loss": 1.1464, "step": 4500 }, { "epoch": 26.44, "learning_rate": 7.08695652173913e-05, "loss": 1.1395, "step": 4600 }, { "epoch": 27.01, "learning_rate": 6.923050379572118e-05, "loss": 1.1432, "step": 4700 }, { "epoch": 27.59, "learning_rate": 6.759144237405107e-05, "loss": 1.1372, "step": 4800 }, { "epoch": 27.59, "eval_cer": 0.08124869945017581, "eval_loss": 0.2759494483470917, "eval_runtime": 383.6861, "eval_samples_per_second": 20.194, "eval_steps_per_second": 0.318, "eval_wer": 0.3357373368878911, "step": 4800 }, { "epoch": 28.16, "learning_rate": 6.595238095238095e-05, "loss": 1.1179, "step": 4900 }, { "epoch": 28.74, "learning_rate": 6.431331953071083e-05, "loss": 1.1221, "step": 5000 }, { "epoch": 29.31, "learning_rate": 6.267425810904071e-05, "loss": 1.116, "step": 5100 }, { "epoch": 29.89, "learning_rate": 6.103519668737059e-05, "loss": 1.1048, "step": 5200 }, { "epoch": 29.89, "eval_cer": 0.07828827263385742, "eval_loss": 0.2669001519680023, "eval_runtime": 386.0726, "eval_samples_per_second": 20.069, "eval_steps_per_second": 0.316, "eval_wer": 0.32842710818015997, "step": 5200 }, { "epoch": 30.46, "learning_rate": 5.939613526570048e-05, "loss": 1.1005, "step": 5300 }, { "epoch": 31.03, "learning_rate": 5.775707384403037e-05, "loss": 1.1056, "step": 5400 }, { "epoch": 31.61, "learning_rate": 5.6118012422360246e-05, "loss": 1.093, "step": 5500 }, { "epoch": 32.18, "learning_rate": 5.4478951000690126e-05, "loss": 1.0966, "step": 5600 }, { "epoch": 32.18, "eval_cer": 0.07754173021930756, "eval_loss": 0.2677817642688751, "eval_runtime": 386.0716, "eval_samples_per_second": 20.069, "eval_steps_per_second": 0.316, "eval_wer": 0.32491932089238107, "step": 5600 }, { "epoch": 32.76, "learning_rate": 5.283988957902001e-05, "loss": 1.0884, "step": 5700 }, { "epoch": 33.33, "learning_rate": 5.120082815734989e-05, "loss": 1.0878, "step": 5800 }, { "epoch": 33.91, "learning_rate": 4.956176673567977e-05, "loss": 1.0803, "step": 5900 }, { "epoch": 34.48, "learning_rate": 4.792270531400966e-05, "loss": 1.0747, "step": 6000 }, { "epoch": 34.48, "eval_cer": 0.07477866519074373, "eval_loss": 0.25474071502685547, "eval_runtime": 385.0232, "eval_samples_per_second": 20.123, "eval_steps_per_second": 0.317, "eval_wer": 0.31337168514101305, "step": 6000 }, { "epoch": 35.06, "learning_rate": 4.628364389233954e-05, "loss": 1.0704, "step": 6100 }, { "epoch": 35.63, "learning_rate": 4.464458247066942e-05, "loss": 1.0707, "step": 6200 }, { "epoch": 36.21, "learning_rate": 4.30055210489993e-05, "loss": 1.0599, "step": 6300 }, { "epoch": 36.78, "learning_rate": 4.136645962732919e-05, "loss": 1.0593, "step": 6400 }, { "epoch": 36.78, "eval_cer": 0.07282435444460891, "eval_loss": 0.2490725815296173, "eval_runtime": 384.9593, "eval_samples_per_second": 20.127, "eval_steps_per_second": 0.317, "eval_wer": 0.3077311631822646, "step": 6400 }, { "epoch": 37.36, "learning_rate": 3.972739820565908e-05, "loss": 1.0572, "step": 6500 }, { "epoch": 37.93, "learning_rate": 3.808833678398896e-05, "loss": 1.0506, "step": 6600 }, { "epoch": 38.51, "learning_rate": 3.644927536231883e-05, "loss": 1.0478, "step": 6700 }, { "epoch": 39.08, "learning_rate": 3.4810213940648726e-05, "loss": 1.0417, "step": 6800 }, { "epoch": 39.08, "eval_cer": 0.07105667930211156, "eval_loss": 0.24495387077331543, "eval_runtime": 382.2473, "eval_samples_per_second": 20.27, "eval_steps_per_second": 0.319, "eval_wer": 0.301220709976147, "step": 6800 }, { "epoch": 39.66, "learning_rate": 3.3171152518978605e-05, "loss": 1.0359, "step": 6900 }, { "epoch": 40.23, "learning_rate": 3.153209109730849e-05, "loss": 1.0389, "step": 7000 }, { "epoch": 40.8, "learning_rate": 2.989302967563837e-05, "loss": 1.0365, "step": 7100 }, { "epoch": 41.38, "learning_rate": 2.8253968253968247e-05, "loss": 1.024, "step": 7200 }, { "epoch": 41.38, "eval_cer": 0.06938768505349148, "eval_loss": 0.24019765853881836, "eval_runtime": 381.671, "eval_samples_per_second": 20.3, "eval_steps_per_second": 0.32, "eval_wer": 0.29560825031570087, "step": 7200 }, { "epoch": 41.95, "learning_rate": 2.6614906832298136e-05, "loss": 1.0288, "step": 7300 }, { "epoch": 42.53, "learning_rate": 2.4975845410628016e-05, "loss": 1.0283, "step": 7400 }, { "epoch": 43.1, "learning_rate": 2.3353174603174605e-05, "loss": 1.0153, "step": 7500 }, { "epoch": 43.68, "learning_rate": 2.1714113181504485e-05, "loss": 1.0106, "step": 7600 }, { "epoch": 43.68, "eval_cer": 0.06811555961720395, "eval_loss": 0.23507660627365112, "eval_runtime": 383.1253, "eval_samples_per_second": 20.223, "eval_steps_per_second": 0.318, "eval_wer": 0.29151115476357514, "step": 7600 }, { "epoch": 44.25, "learning_rate": 2.007505175983437e-05, "loss": 1.013, "step": 7700 }, { "epoch": 44.83, "learning_rate": 1.843599033816425e-05, "loss": 1.0067, "step": 7800 }, { "epoch": 45.4, "learning_rate": 1.6796928916494137e-05, "loss": 1.0076, "step": 7900 }, { "epoch": 45.98, "learning_rate": 1.5157867494824016e-05, "loss": 1.0014, "step": 8000 }, { "epoch": 45.98, "eval_cer": 0.06731109580842178, "eval_loss": 0.23282095789909363, "eval_runtime": 382.9081, "eval_samples_per_second": 20.235, "eval_steps_per_second": 0.319, "eval_wer": 0.2896309807773257, "step": 8000 }, { "epoch": 46.55, "learning_rate": 1.3518806073153895e-05, "loss": 0.9955, "step": 8100 }, { "epoch": 47.13, "learning_rate": 1.1879744651483783e-05, "loss": 0.9998, "step": 8200 }, { "epoch": 47.7, "learning_rate": 1.0240683229813662e-05, "loss": 0.9956, "step": 8300 }, { "epoch": 48.28, "learning_rate": 8.60162180814355e-06, "loss": 0.9999, "step": 8400 }, { "epoch": 48.28, "eval_cer": 0.06668254141915997, "eval_loss": 0.2318294197320938, "eval_runtime": 383.814, "eval_samples_per_second": 20.187, "eval_steps_per_second": 0.318, "eval_wer": 0.2865862214115336, "step": 8400 }, { "epoch": 48.85, "learning_rate": 6.962560386473429e-06, "loss": 0.9898, "step": 8500 }, { "epoch": 49.43, "learning_rate": 5.323498964803316e-06, "loss": 0.997, "step": 8600 }, { "epoch": 49.95, "step": 8692, "total_flos": 2.038617513286856e+20, "train_loss": 1.5808194972818148, "train_runtime": 72373.5093, "train_samples_per_second": 15.373, "train_steps_per_second": 0.12 } ], "max_steps": 8692, "num_train_epochs": 50, "total_flos": 2.038617513286856e+20, "trial_name": null, "trial_params": null }