{ "best_metric": 0.12852737307548523, "best_model_checkpoint": "./checkpoint-10000", "epoch": 64.51323175621492, "global_step": 10000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.64, "learning_rate": 8.4824e-06, "loss": 6.9402, "step": 100 }, { "epoch": 1.29, "learning_rate": 1.6402400000000004e-05, "loss": 3.2608, "step": 200 }, { "epoch": 1.93, "learning_rate": 2.4322400000000003e-05, "loss": 2.4251, "step": 300 }, { "epoch": 2.58, "learning_rate": 3.22424e-05, "loss": 1.4757, "step": 400 }, { "epoch": 3.22, "learning_rate": 4.01624e-05, "loss": 1.2323, "step": 500 }, { "epoch": 3.22, "eval_cer": 0.07965777210531003, "eval_loss": 0.28158774971961975, "eval_runtime": 197.0167, "eval_samples_per_second": 29.449, "eval_steps_per_second": 0.462, "eval_wer": 0.41325687856906884, "step": 500 }, { "epoch": 3.87, "learning_rate": 4.80824e-05, "loss": 1.1608, "step": 600 }, { "epoch": 4.51, "learning_rate": 5.600240000000001e-05, "loss": 1.1297, "step": 700 }, { "epoch": 5.16, "learning_rate": 6.39224e-05, "loss": 1.0738, "step": 800 }, { "epoch": 5.8, "learning_rate": 7.184240000000001e-05, "loss": 1.0863, "step": 900 }, { "epoch": 6.45, "learning_rate": 7.97624e-05, "loss": 0.9826, "step": 1000 }, { "epoch": 6.45, "eval_cer": 0.05135273515182096, "eval_loss": 0.19702914357185364, "eval_runtime": 195.781, "eval_samples_per_second": 29.635, "eval_steps_per_second": 0.465, "eval_wer": 0.26877153239888624, "step": 1000 }, { "epoch": 7.1, "learning_rate": 8e-05, "loss": 0.9708, "step": 1100 }, { "epoch": 7.74, "learning_rate": 8e-05, "loss": 0.917, "step": 1200 }, { "epoch": 8.38, "learning_rate": 8e-05, "loss": 0.888, "step": 1300 }, { "epoch": 9.03, "learning_rate": 8e-05, "loss": 0.9048, "step": 1400 }, { "epoch": 9.67, "learning_rate": 8e-05, "loss": 0.8628, "step": 1500 }, { "epoch": 9.67, "eval_cer": 0.04743848505535603, "eval_loss": 0.16490551829338074, "eval_runtime": 196.0899, "eval_samples_per_second": 29.588, "eval_steps_per_second": 0.464, "eval_wer": 0.24850158100901412, "step": 1500 }, { "epoch": 10.32, "learning_rate": 8e-05, "loss": 0.8616, "step": 1600 }, { "epoch": 10.96, "learning_rate": 8e-05, "loss": 0.8517, "step": 1700 }, { "epoch": 11.61, "learning_rate": 8e-05, "loss": 0.8455, "step": 1800 }, { "epoch": 12.26, "learning_rate": 8e-05, "loss": 0.8436, "step": 1900 }, { "epoch": 12.9, "learning_rate": 8e-05, "loss": 0.8348, "step": 2000 }, { "epoch": 12.9, "eval_cer": 0.046703867501706686, "eval_loss": 0.16045768558979034, "eval_runtime": 196.1747, "eval_samples_per_second": 29.576, "eval_steps_per_second": 0.464, "eval_wer": 0.24604747746472228, "step": 2000 }, { "epoch": 13.55, "learning_rate": 8e-05, "loss": 0.844, "step": 2100 }, { "epoch": 14.19, "learning_rate": 8e-05, "loss": 0.8369, "step": 2200 }, { "epoch": 14.83, "learning_rate": 8e-05, "loss": 0.8241, "step": 2300 }, { "epoch": 15.48, "learning_rate": 8e-05, "loss": 0.8235, "step": 2400 }, { "epoch": 16.13, "learning_rate": 8e-05, "loss": 0.8186, "step": 2500 }, { "epoch": 16.13, "eval_cer": 0.04690421774361105, "eval_loss": 0.1608021855354309, "eval_runtime": 196.173, "eval_samples_per_second": 29.576, "eval_steps_per_second": 0.464, "eval_wer": 0.24692057199490303, "step": 2500 }, { "epoch": 16.77, "learning_rate": 8e-05, "loss": 0.8355, "step": 2600 }, { "epoch": 17.42, "learning_rate": 8e-05, "loss": 0.8157, "step": 2700 }, { "epoch": 18.06, "learning_rate": 8e-05, "loss": 0.8175, "step": 2800 }, { "epoch": 18.71, "learning_rate": 8e-05, "loss": 0.801, "step": 2900 }, { "epoch": 19.35, "learning_rate": 8e-05, "loss": 0.8011, "step": 3000 }, { "epoch": 19.35, "eval_cer": 0.046789201863999284, "eval_loss": 0.1620311141014099, "eval_runtime": 197.3892, "eval_samples_per_second": 29.394, "eval_steps_per_second": 0.461, "eval_wer": 0.24118646467506724, "step": 3000 }, { "epoch": 19.99, "learning_rate": 8e-05, "loss": 0.7888, "step": 3100 }, { "epoch": 20.64, "learning_rate": 8e-05, "loss": 0.8008, "step": 3200 }, { "epoch": 21.29, "learning_rate": 8e-05, "loss": 0.8197, "step": 3300 }, { "epoch": 21.93, "learning_rate": 8e-05, "loss": 0.8065, "step": 3400 }, { "epoch": 22.58, "learning_rate": 8e-05, "loss": 0.807, "step": 3500 }, { "epoch": 22.58, "eval_cer": 0.049805586061559465, "eval_loss": 0.17369326949119568, "eval_runtime": 196.0869, "eval_samples_per_second": 29.589, "eval_steps_per_second": 0.464, "eval_wer": 0.252395110670631, "step": 3500 }, { "epoch": 23.22, "learning_rate": 8e-05, "loss": 0.8045, "step": 3600 }, { "epoch": 23.87, "learning_rate": 8e-05, "loss": 0.7925, "step": 3700 }, { "epoch": 24.51, "learning_rate": 8e-05, "loss": 0.8046, "step": 3800 }, { "epoch": 25.16, "learning_rate": 8e-05, "loss": 0.8102, "step": 3900 }, { "epoch": 25.8, "learning_rate": 8e-05, "loss": 0.7758, "step": 4000 }, { "epoch": 25.8, "eval_cer": 0.04979074530289988, "eval_loss": 0.1708839237689972, "eval_runtime": 196.4196, "eval_samples_per_second": 29.539, "eval_steps_per_second": 0.463, "eval_wer": 0.2535985652933126, "step": 4000 }, { "epoch": 26.45, "learning_rate": 8e-05, "loss": 0.7968, "step": 4100 }, { "epoch": 27.1, "learning_rate": 8e-05, "loss": 0.7904, "step": 4200 }, { "epoch": 27.74, "learning_rate": 8e-05, "loss": 0.8001, "step": 4300 }, { "epoch": 28.38, "learning_rate": 8e-05, "loss": 0.7869, "step": 4400 }, { "epoch": 29.03, "learning_rate": 8e-05, "loss": 0.7923, "step": 4500 }, { "epoch": 29.03, "eval_cer": 0.04736799145172301, "eval_loss": 0.16446976363658905, "eval_runtime": 196.4759, "eval_samples_per_second": 29.53, "eval_steps_per_second": 0.463, "eval_wer": 0.24356977677096606, "step": 4500 }, { "epoch": 29.67, "learning_rate": 8e-05, "loss": 0.772, "step": 4600 }, { "epoch": 30.32, "learning_rate": 8e-05, "loss": 0.7702, "step": 4700 }, { "epoch": 30.96, "learning_rate": 8e-05, "loss": 0.7797, "step": 4800 }, { "epoch": 31.61, "learning_rate": 8e-05, "loss": 0.7759, "step": 4900 }, { "epoch": 32.26, "learning_rate": 8e-05, "loss": 0.7717, "step": 5000 }, { "epoch": 32.26, "eval_cer": 0.052350776171677896, "eval_loss": 0.1811000257730484, "eval_runtime": 196.7068, "eval_samples_per_second": 29.496, "eval_steps_per_second": 0.463, "eval_wer": 0.26355656236726605, "step": 5000 }, { "epoch": 32.9, "learning_rate": 7.852560000000001e-05, "loss": 0.7608, "step": 5100 }, { "epoch": 33.55, "learning_rate": 7.700560000000001e-05, "loss": 0.763, "step": 5200 }, { "epoch": 34.19, "learning_rate": 7.54856e-05, "loss": 0.7712, "step": 5300 }, { "epoch": 34.83, "learning_rate": 7.39656e-05, "loss": 0.7478, "step": 5400 }, { "epoch": 35.48, "learning_rate": 7.24456e-05, "loss": 0.7447, "step": 5500 }, { "epoch": 35.48, "eval_cer": 0.04679662224332908, "eval_loss": 0.16353937983512878, "eval_runtime": 196.1767, "eval_samples_per_second": 29.575, "eval_steps_per_second": 0.464, "eval_wer": 0.2404785501911369, "step": 5500 }, { "epoch": 36.13, "learning_rate": 7.09256e-05, "loss": 0.7544, "step": 5600 }, { "epoch": 36.77, "learning_rate": 6.94056e-05, "loss": 0.7438, "step": 5700 }, { "epoch": 37.42, "learning_rate": 6.79008e-05, "loss": 0.742, "step": 5800 }, { "epoch": 38.06, "learning_rate": 6.638080000000001e-05, "loss": 0.7441, "step": 5900 }, { "epoch": 38.71, "learning_rate": 6.486080000000001e-05, "loss": 0.7267, "step": 6000 }, { "epoch": 38.71, "eval_cer": 0.046236383603929836, "eval_loss": 0.15783575177192688, "eval_runtime": 197.1092, "eval_samples_per_second": 29.435, "eval_steps_per_second": 0.462, "eval_wer": 0.23542876020576714, "step": 6000 }, { "epoch": 39.35, "learning_rate": 6.33408e-05, "loss": 0.7112, "step": 6100 }, { "epoch": 39.99, "learning_rate": 6.18208e-05, "loss": 0.7052, "step": 6200 }, { "epoch": 40.64, "learning_rate": 6.0300800000000004e-05, "loss": 0.7105, "step": 6300 }, { "epoch": 41.29, "learning_rate": 5.878080000000001e-05, "loss": 0.7107, "step": 6400 }, { "epoch": 41.93, "learning_rate": 5.72608e-05, "loss": 0.7046, "step": 6500 }, { "epoch": 41.93, "eval_cer": 0.044429521237125645, "eval_loss": 0.15552951395511627, "eval_runtime": 196.7222, "eval_samples_per_second": 29.493, "eval_steps_per_second": 0.463, "eval_wer": 0.22957666713860966, "step": 6500 }, { "epoch": 42.58, "learning_rate": 5.574080000000001e-05, "loss": 0.7035, "step": 6600 }, { "epoch": 43.22, "learning_rate": 5.422080000000001e-05, "loss": 0.6967, "step": 6700 }, { "epoch": 43.87, "learning_rate": 5.271600000000001e-05, "loss": 0.687, "step": 6800 }, { "epoch": 44.51, "learning_rate": 5.1196e-05, "loss": 0.6875, "step": 6900 }, { "epoch": 45.16, "learning_rate": 4.967600000000001e-05, "loss": 0.6896, "step": 7000 }, { "epoch": 45.16, "eval_cer": 0.043932355822029624, "eval_loss": 0.15479956567287445, "eval_runtime": 196.5953, "eval_samples_per_second": 29.512, "eval_steps_per_second": 0.463, "eval_wer": 0.2271697578932465, "step": 7000 }, { "epoch": 45.8, "learning_rate": 4.8156000000000004e-05, "loss": 0.6722, "step": 7100 }, { "epoch": 46.45, "learning_rate": 4.663600000000001e-05, "loss": 0.6816, "step": 7200 }, { "epoch": 47.1, "learning_rate": 4.5116000000000006e-05, "loss": 0.6658, "step": 7300 }, { "epoch": 47.74, "learning_rate": 4.359600000000001e-05, "loss": 0.6507, "step": 7400 }, { "epoch": 48.38, "learning_rate": 4.207600000000001e-05, "loss": 0.6575, "step": 7500 }, { "epoch": 48.38, "eval_cer": 0.03991422041494761, "eval_loss": 0.14319901168346405, "eval_runtime": 196.3465, "eval_samples_per_second": 29.55, "eval_steps_per_second": 0.463, "eval_wer": 0.2096370758412384, "step": 7500 }, { "epoch": 49.03, "learning_rate": 4.0556e-05, "loss": 0.6524, "step": 7600 }, { "epoch": 49.67, "learning_rate": 3.9036000000000004e-05, "loss": 0.6336, "step": 7700 }, { "epoch": 50.32, "learning_rate": 3.751600000000001e-05, "loss": 0.6335, "step": 7800 }, { "epoch": 50.96, "learning_rate": 3.5996000000000006e-05, "loss": 0.6356, "step": 7900 }, { "epoch": 51.61, "learning_rate": 3.447600000000001e-05, "loss": 0.6264, "step": 8000 }, { "epoch": 51.61, "eval_cer": 0.039750972069692206, "eval_loss": 0.14660798013210297, "eval_runtime": 197.4423, "eval_samples_per_second": 29.386, "eval_steps_per_second": 0.461, "eval_wer": 0.20560196328283542, "step": 8000 }, { "epoch": 52.26, "learning_rate": 3.295600000000001e-05, "loss": 0.6151, "step": 8100 }, { "epoch": 52.9, "learning_rate": 3.1436e-05, "loss": 0.6138, "step": 8200 }, { "epoch": 53.55, "learning_rate": 2.9916000000000003e-05, "loss": 0.6066, "step": 8300 }, { "epoch": 54.19, "learning_rate": 2.839600000000001e-05, "loss": 0.6091, "step": 8400 }, { "epoch": 54.83, "learning_rate": 2.687600000000001e-05, "loss": 0.589, "step": 8500 }, { "epoch": 54.83, "eval_cer": 0.03713528835594076, "eval_loss": 0.1351083666086197, "eval_runtime": 196.1992, "eval_samples_per_second": 29.572, "eval_steps_per_second": 0.464, "eval_wer": 0.19427533153994997, "step": 8500 }, { "epoch": 55.48, "learning_rate": 2.5356000000000006e-05, "loss": 0.59, "step": 8600 }, { "epoch": 56.13, "learning_rate": 2.3836000000000007e-05, "loss": 0.5954, "step": 8700 }, { "epoch": 56.77, "learning_rate": 2.2316000000000005e-05, "loss": 0.5886, "step": 8800 }, { "epoch": 57.42, "learning_rate": 2.0796000000000002e-05, "loss": 0.5923, "step": 8900 }, { "epoch": 58.06, "learning_rate": 1.927600000000001e-05, "loss": 0.573, "step": 9000 }, { "epoch": 58.06, "eval_cer": 0.03653794781989255, "eval_loss": 0.13869842886924744, "eval_runtime": 197.6459, "eval_samples_per_second": 29.356, "eval_steps_per_second": 0.46, "eval_wer": 0.19342583415923356, "step": 9000 }, { "epoch": 58.71, "learning_rate": 1.7756000000000008e-05, "loss": 0.5681, "step": 9100 }, { "epoch": 59.35, "learning_rate": 1.623600000000001e-05, "loss": 0.5749, "step": 9200 }, { "epoch": 59.99, "learning_rate": 1.4716000000000006e-05, "loss": 0.5649, "step": 9300 }, { "epoch": 60.64, "learning_rate": 1.3196000000000004e-05, "loss": 0.5649, "step": 9400 }, { "epoch": 61.29, "learning_rate": 1.1676000000000003e-05, "loss": 0.5537, "step": 9500 }, { "epoch": 61.29, "eval_cer": 0.035276483333828025, "eval_loss": 0.132797509431839, "eval_runtime": 197.1347, "eval_samples_per_second": 29.432, "eval_steps_per_second": 0.462, "eval_wer": 0.18825805842654209, "step": 9500 }, { "epoch": 61.93, "learning_rate": 1.015600000000001e-05, "loss": 0.5551, "step": 9600 }, { "epoch": 62.58, "learning_rate": 8.636000000000008e-06, "loss": 0.5563, "step": 9700 }, { "epoch": 63.22, "learning_rate": 7.116000000000008e-06, "loss": 0.5469, "step": 9800 }, { "epoch": 63.87, "learning_rate": 5.596000000000006e-06, "loss": 0.5431, "step": 9900 }, { "epoch": 64.51, "learning_rate": 4.076000000000005e-06, "loss": 0.544, "step": 10000 }, { "epoch": 64.51, "eval_cer": 0.034204238520673176, "eval_loss": 0.12852737307548523, "eval_runtime": 197.3431, "eval_samples_per_second": 29.401, "eval_steps_per_second": 0.461, "eval_wer": 0.18207560526688377, "step": 10000 }, { "epoch": 64.51, "step": 10000, "total_flos": 6.715892353150186e+20, "train_loss": 0.5247637950897217, "train_runtime": 62085.4852, "train_samples_per_second": 20.617, "train_steps_per_second": 0.161 } ], "max_steps": 10000, "num_train_epochs": 65, "total_flos": 6.715892353150186e+20, "trial_name": null, "trial_params": null }