{ "best_metric": 0.10924588888883591, "best_model_checkpoint": "./checkpoint-12000", "epoch": 38.58520900321543, "global_step": 12000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.32, "learning_rate": 4.50125e-06, "loss": 8.0884, "step": 100 }, { "epoch": 0.64, "learning_rate": 8.62625e-06, "loss": 3.2246, "step": 200 }, { "epoch": 0.96, "learning_rate": 1.2751250000000001e-05, "loss": 3.1607, "step": 300 }, { "epoch": 1.29, "learning_rate": 1.6876250000000003e-05, "loss": 2.3964, "step": 400 }, { "epoch": 1.61, "learning_rate": 2.100125e-05, "loss": 1.7005, "step": 500 }, { "epoch": 1.61, "eval_cer": 0.11636638864978778, "eval_loss": 0.4082379639148712, "eval_runtime": 199.3377, "eval_samples_per_second": 29.106, "eval_steps_per_second": 0.457, "eval_wer": 0.5583793477747888, "step": 500 }, { "epoch": 1.93, "learning_rate": 2.512625e-05, "loss": 1.4874, "step": 600 }, { "epoch": 2.25, "learning_rate": 2.9251250000000002e-05, "loss": 1.3431, "step": 700 }, { "epoch": 2.57, "learning_rate": 3.3376250000000004e-05, "loss": 1.2316, "step": 800 }, { "epoch": 2.89, "learning_rate": 3.750125e-05, "loss": 1.187, "step": 900 }, { "epoch": 3.22, "learning_rate": 4.162625e-05, "loss": 1.1555, "step": 1000 }, { "epoch": 3.22, "eval_cer": 0.05566026535276483, "eval_loss": 0.2020130306482315, "eval_runtime": 199.2116, "eval_samples_per_second": 29.125, "eval_steps_per_second": 0.457, "eval_wer": 0.29534192269573833, "step": 1000 }, { "epoch": 3.54, "learning_rate": 4.575125e-05, "loss": 1.1286, "step": 1100 }, { "epoch": 3.86, "learning_rate": 4.9876250000000005e-05, "loss": 1.1143, "step": 1200 }, { "epoch": 4.18, "learning_rate": 5e-05, "loss": 1.1067, "step": 1300 }, { "epoch": 4.5, "learning_rate": 5e-05, "loss": 1.0992, "step": 1400 }, { "epoch": 4.82, "learning_rate": 5e-05, "loss": 1.0927, "step": 1500 }, { "epoch": 4.82, "eval_cer": 0.04799130331542548, "eval_loss": 0.1707664430141449, "eval_runtime": 197.8453, "eval_samples_per_second": 29.326, "eval_steps_per_second": 0.46, "eval_wer": 0.25843598093350323, "step": 1500 }, { "epoch": 5.14, "learning_rate": 5e-05, "loss": 1.0907, "step": 1600 }, { "epoch": 5.47, "learning_rate": 5e-05, "loss": 1.0765, "step": 1700 }, { "epoch": 5.79, "learning_rate": 5e-05, "loss": 1.0693, "step": 1800 }, { "epoch": 6.11, "learning_rate": 5e-05, "loss": 1.0547, "step": 1900 }, { "epoch": 6.43, "learning_rate": 5e-05, "loss": 1.0707, "step": 2000 }, { "epoch": 6.43, "eval_cer": 0.04497120892820041, "eval_loss": 0.15630319714546204, "eval_runtime": 197.159, "eval_samples_per_second": 29.428, "eval_steps_per_second": 0.462, "eval_wer": 0.24054934163952996, "step": 2000 }, { "epoch": 6.75, "learning_rate": 5e-05, "loss": 1.0647, "step": 2100 }, { "epoch": 7.07, "learning_rate": 5e-05, "loss": 1.054, "step": 2200 }, { "epoch": 7.4, "learning_rate": 5e-05, "loss": 1.0478, "step": 2300 }, { "epoch": 7.72, "learning_rate": 5e-05, "loss": 1.0611, "step": 2400 }, { "epoch": 8.04, "learning_rate": 5e-05, "loss": 1.0728, "step": 2500 }, { "epoch": 8.04, "eval_cer": 0.04629574663856816, "eval_loss": 0.16203930974006653, "eval_runtime": 196.804, "eval_samples_per_second": 29.481, "eval_steps_per_second": 0.462, "eval_wer": 0.2442304969559677, "step": 2500 }, { "epoch": 8.36, "learning_rate": 5e-05, "loss": 1.0563, "step": 2600 }, { "epoch": 8.68, "learning_rate": 5e-05, "loss": 1.0404, "step": 2700 }, { "epoch": 9.0, "learning_rate": 5e-05, "loss": 1.0791, "step": 2800 }, { "epoch": 9.32, "learning_rate": 5e-05, "loss": 1.0535, "step": 2900 }, { "epoch": 9.65, "learning_rate": 5e-05, "loss": 1.0268, "step": 3000 }, { "epoch": 9.65, "eval_cer": 0.04575776913715829, "eval_loss": 0.15875375270843506, "eval_runtime": 201.4769, "eval_samples_per_second": 28.797, "eval_steps_per_second": 0.452, "eval_wer": 0.2377884751522016, "step": 3000 }, { "epoch": 9.97, "learning_rate": 5e-05, "loss": 1.0322, "step": 3100 }, { "epoch": 10.29, "learning_rate": 5e-05, "loss": 1.0208, "step": 3200 }, { "epoch": 10.61, "learning_rate": 5e-05, "loss": 1.0172, "step": 3300 }, { "epoch": 10.93, "learning_rate": 5e-05, "loss": 1.019, "step": 3400 }, { "epoch": 11.25, "learning_rate": 5e-05, "loss": 1.0328, "step": 3500 }, { "epoch": 11.25, "eval_cer": 0.04419206909857232, "eval_loss": 0.14661966264247894, "eval_runtime": 196.9894, "eval_samples_per_second": 29.453, "eval_steps_per_second": 0.462, "eval_wer": 0.23516919156165936, "step": 3500 }, { "epoch": 11.58, "learning_rate": 5e-05, "loss": 1.0153, "step": 3600 }, { "epoch": 11.9, "learning_rate": 5e-05, "loss": 1.0206, "step": 3700 }, { "epoch": 12.22, "learning_rate": 5e-05, "loss": 1.0168, "step": 3800 }, { "epoch": 12.54, "learning_rate": 5e-05, "loss": 1.0269, "step": 3900 }, { "epoch": 12.86, "learning_rate": 5e-05, "loss": 1.0249, "step": 4000 }, { "epoch": 12.86, "eval_cer": 0.04486361342791843, "eval_loss": 0.15519459545612335, "eval_runtime": 197.5966, "eval_samples_per_second": 29.363, "eval_steps_per_second": 0.461, "eval_wer": 0.23413091698522817, "step": 4000 }, { "epoch": 13.18, "learning_rate": 5e-05, "loss": 1.022, "step": 4100 }, { "epoch": 13.5, "learning_rate": 5e-05, "loss": 1.0219, "step": 4200 }, { "epoch": 13.83, "learning_rate": 5e-05, "loss": 1.0203, "step": 4300 }, { "epoch": 14.15, "learning_rate": 5e-05, "loss": 1.0171, "step": 4400 }, { "epoch": 14.47, "learning_rate": 5e-05, "loss": 1.016, "step": 4500 }, { "epoch": 14.47, "eval_cer": 0.047286367279095305, "eval_loss": 0.16016805171966553, "eval_runtime": 197.4133, "eval_samples_per_second": 29.39, "eval_steps_per_second": 0.461, "eval_wer": 0.2435461796215017, "step": 4500 }, { "epoch": 14.79, "learning_rate": 5e-05, "loss": 1.0233, "step": 4600 }, { "epoch": 15.11, "learning_rate": 5e-05, "loss": 1.0139, "step": 4700 }, { "epoch": 15.43, "learning_rate": 5e-05, "loss": 1.0252, "step": 4800 }, { "epoch": 15.76, "learning_rate": 4.936666666666667e-05, "loss": 1.0305, "step": 4900 }, { "epoch": 16.08, "learning_rate": 4.870694444444445e-05, "loss": 1.0164, "step": 5000 }, { "epoch": 16.08, "eval_cer": 0.044392419340476684, "eval_loss": 0.14910832047462463, "eval_runtime": 205.8325, "eval_samples_per_second": 28.188, "eval_steps_per_second": 0.442, "eval_wer": 0.23372976544433433, "step": 5000 }, { "epoch": 16.4, "learning_rate": 4.804722222222223e-05, "loss": 1.0029, "step": 5100 }, { "epoch": 16.72, "learning_rate": 4.73875e-05, "loss": 0.9924, "step": 5200 }, { "epoch": 17.04, "learning_rate": 4.672777777777778e-05, "loss": 1.0058, "step": 5300 }, { "epoch": 17.36, "learning_rate": 4.606805555555556e-05, "loss": 0.996, "step": 5400 }, { "epoch": 17.68, "learning_rate": 4.540833333333334e-05, "loss": 0.9935, "step": 5500 }, { "epoch": 17.68, "eval_cer": 0.045754058947493396, "eval_loss": 0.15390604734420776, "eval_runtime": 206.7044, "eval_samples_per_second": 28.069, "eval_steps_per_second": 0.44, "eval_wer": 0.23729293501345036, "step": 5500 }, { "epoch": 18.01, "learning_rate": 4.4748611111111116e-05, "loss": 0.9993, "step": 5600 }, { "epoch": 18.33, "learning_rate": 4.408888888888889e-05, "loss": 0.983, "step": 5700 }, { "epoch": 18.65, "learning_rate": 4.342916666666667e-05, "loss": 0.9794, "step": 5800 }, { "epoch": 18.97, "learning_rate": 4.2769444444444447e-05, "loss": 0.9719, "step": 5900 }, { "epoch": 19.29, "learning_rate": 4.2109722222222226e-05, "loss": 0.9626, "step": 6000 }, { "epoch": 19.29, "eval_cer": 0.04342777002760381, "eval_loss": 0.1458132266998291, "eval_runtime": 201.2355, "eval_samples_per_second": 28.832, "eval_steps_per_second": 0.452, "eval_wer": 0.2305441502666478, "step": 6000 }, { "epoch": 19.61, "learning_rate": 4.145e-05, "loss": 0.9542, "step": 6100 }, { "epoch": 19.94, "learning_rate": 4.079027777777778e-05, "loss": 0.978, "step": 6200 }, { "epoch": 20.26, "learning_rate": 4.013055555555556e-05, "loss": 0.9536, "step": 6300 }, { "epoch": 20.58, "learning_rate": 3.9470833333333335e-05, "loss": 0.9627, "step": 6400 }, { "epoch": 20.9, "learning_rate": 3.8811111111111114e-05, "loss": 0.9505, "step": 6500 }, { "epoch": 20.9, "eval_cer": 0.04073046214122466, "eval_loss": 0.13684287667274475, "eval_runtime": 202.0319, "eval_samples_per_second": 28.718, "eval_steps_per_second": 0.45, "eval_wer": 0.21565434895464627, "step": 6500 }, { "epoch": 21.22, "learning_rate": 3.815138888888889e-05, "loss": 0.9395, "step": 6600 }, { "epoch": 21.54, "learning_rate": 3.749166666666667e-05, "loss": 0.9393, "step": 6700 }, { "epoch": 21.86, "learning_rate": 3.6831944444444444e-05, "loss": 0.9541, "step": 6800 }, { "epoch": 22.19, "learning_rate": 3.6172222222222224e-05, "loss": 0.9538, "step": 6900 }, { "epoch": 22.51, "learning_rate": 3.55125e-05, "loss": 0.9389, "step": 7000 }, { "epoch": 22.51, "eval_cer": 0.042626369059986347, "eval_loss": 0.14371351897716522, "eval_runtime": 197.7954, "eval_samples_per_second": 29.333, "eval_steps_per_second": 0.46, "eval_wer": 0.22306385388645053, "step": 7000 }, { "epoch": 22.83, "learning_rate": 3.485277777777778e-05, "loss": 0.9429, "step": 7100 }, { "epoch": 23.15, "learning_rate": 3.419965277777778e-05, "loss": 0.9407, "step": 7200 }, { "epoch": 23.47, "learning_rate": 3.353993055555556e-05, "loss": 0.9224, "step": 7300 }, { "epoch": 23.79, "learning_rate": 3.288020833333334e-05, "loss": 0.9197, "step": 7400 }, { "epoch": 24.12, "learning_rate": 3.2220486111111115e-05, "loss": 0.9129, "step": 7500 }, { "epoch": 24.12, "eval_cer": 0.039372532723872845, "eval_loss": 0.13133755326271057, "eval_runtime": 209.4773, "eval_samples_per_second": 27.698, "eval_steps_per_second": 0.434, "eval_wer": 0.20760772098730473, "step": 7500 }, { "epoch": 24.44, "learning_rate": 3.156076388888889e-05, "loss": 0.9169, "step": 7600 }, { "epoch": 24.76, "learning_rate": 3.090763888888889e-05, "loss": 0.9133, "step": 7700 }, { "epoch": 25.08, "learning_rate": 3.024791666666667e-05, "loss": 0.9068, "step": 7800 }, { "epoch": 25.4, "learning_rate": 2.958819444444445e-05, "loss": 0.9137, "step": 7900 }, { "epoch": 25.72, "learning_rate": 2.8928472222222224e-05, "loss": 0.9118, "step": 8000 }, { "epoch": 25.72, "eval_cer": 0.03844869549731382, "eval_loss": 0.12918178737163544, "eval_runtime": 197.6149, "eval_samples_per_second": 29.36, "eval_steps_per_second": 0.46, "eval_wer": 0.2040445514181887, "step": 8000 }, { "epoch": 26.05, "learning_rate": 2.826875e-05, "loss": 0.9057, "step": 8100 }, { "epoch": 26.37, "learning_rate": 2.7609027777777785e-05, "loss": 0.8956, "step": 8200 }, { "epoch": 26.69, "learning_rate": 2.694930555555556e-05, "loss": 0.9088, "step": 8300 }, { "epoch": 27.01, "learning_rate": 2.6289583333333333e-05, "loss": 0.8997, "step": 8400 }, { "epoch": 27.33, "learning_rate": 2.5629861111111116e-05, "loss": 0.8848, "step": 8500 }, { "epoch": 27.33, "eval_cer": 0.03840788341099997, "eval_loss": 0.1298777312040329, "eval_runtime": 197.318, "eval_samples_per_second": 29.404, "eval_steps_per_second": 0.461, "eval_wer": 0.20281749964604276, "step": 8500 }, { "epoch": 27.65, "learning_rate": 2.4970138888888895e-05, "loss": 0.8926, "step": 8600 }, { "epoch": 27.97, "learning_rate": 2.431041666666667e-05, "loss": 0.8802, "step": 8700 }, { "epoch": 28.3, "learning_rate": 2.365069444444445e-05, "loss": 0.8784, "step": 8800 }, { "epoch": 28.62, "learning_rate": 2.2990972222222225e-05, "loss": 0.8749, "step": 8900 }, { "epoch": 28.94, "learning_rate": 2.2331250000000004e-05, "loss": 0.8667, "step": 9000 }, { "epoch": 28.94, "eval_cer": 0.03673829806179692, "eval_loss": 0.12283530086278915, "eval_runtime": 199.3855, "eval_samples_per_second": 29.099, "eval_steps_per_second": 0.456, "eval_wer": 0.1945113030345934, "step": 9000 }, { "epoch": 29.26, "learning_rate": 2.1671527777777783e-05, "loss": 0.8628, "step": 9100 }, { "epoch": 29.58, "learning_rate": 2.101180555555556e-05, "loss": 0.8775, "step": 9200 }, { "epoch": 29.9, "learning_rate": 2.0352083333333338e-05, "loss": 0.8661, "step": 9300 }, { "epoch": 30.23, "learning_rate": 1.9692361111111114e-05, "loss": 0.8624, "step": 9400 }, { "epoch": 30.55, "learning_rate": 1.9032638888888893e-05, "loss": 0.8641, "step": 9500 }, { "epoch": 30.55, "eval_cer": 0.036352438336647766, "eval_loss": 0.12234856933355331, "eval_runtime": 202.2537, "eval_samples_per_second": 28.687, "eval_steps_per_second": 0.45, "eval_wer": 0.19385058284959178, "step": 9500 }, { "epoch": 30.87, "learning_rate": 1.837291666666667e-05, "loss": 0.8637, "step": 9600 }, { "epoch": 31.19, "learning_rate": 1.7713194444444447e-05, "loss": 0.8608, "step": 9700 }, { "epoch": 31.51, "learning_rate": 1.7053472222222226e-05, "loss": 0.8556, "step": 9800 }, { "epoch": 31.83, "learning_rate": 1.6393750000000002e-05, "loss": 0.854, "step": 9900 }, { "epoch": 32.15, "learning_rate": 1.573402777777778e-05, "loss": 0.8516, "step": 10000 }, { "epoch": 32.15, "eval_cer": 0.03494627645365231, "eval_loss": 0.11841931194067001, "eval_runtime": 199.2371, "eval_samples_per_second": 29.121, "eval_steps_per_second": 0.457, "eval_wer": 0.18762093539100477, "step": 10000 }, { "epoch": 32.48, "learning_rate": 1.5074305555555557e-05, "loss": 0.8433, "step": 10100 }, { "epoch": 32.8, "learning_rate": 1.4414583333333338e-05, "loss": 0.8507, "step": 10200 }, { "epoch": 33.12, "learning_rate": 1.3754861111111117e-05, "loss": 0.8419, "step": 10300 }, { "epoch": 33.44, "learning_rate": 1.3095138888888892e-05, "loss": 0.8344, "step": 10400 }, { "epoch": 33.76, "learning_rate": 1.2435416666666671e-05, "loss": 0.8379, "step": 10500 }, { "epoch": 33.76, "eval_cer": 0.03375159538155591, "eval_loss": 0.11372008919715881, "eval_runtime": 199.4785, "eval_samples_per_second": 29.086, "eval_steps_per_second": 0.456, "eval_wer": 0.18207560526688377, "step": 10500 }, { "epoch": 34.08, "learning_rate": 1.1782291666666672e-05, "loss": 0.8302, "step": 10600 }, { "epoch": 34.41, "learning_rate": 1.1122569444444448e-05, "loss": 0.8294, "step": 10700 }, { "epoch": 34.73, "learning_rate": 1.0462847222222227e-05, "loss": 0.8225, "step": 10800 }, { "epoch": 35.05, "learning_rate": 9.803125000000001e-06, "loss": 0.8237, "step": 10900 }, { "epoch": 35.37, "learning_rate": 9.143402777777782e-06, "loss": 0.8235, "step": 11000 }, { "epoch": 35.37, "eval_cer": 0.03308005105220979, "eval_loss": 0.11269930005073547, "eval_runtime": 198.8276, "eval_samples_per_second": 29.181, "eval_steps_per_second": 0.458, "eval_wer": 0.1778753126622304, "step": 11000 }, { "epoch": 35.69, "learning_rate": 8.483680555555563e-06, "loss": 0.8205, "step": 11100 }, { "epoch": 36.01, "learning_rate": 7.823958333333337e-06, "loss": 0.826, "step": 11200 }, { "epoch": 36.33, "learning_rate": 7.1642361111111165e-06, "loss": 0.8207, "step": 11300 }, { "epoch": 36.66, "learning_rate": 6.504513888888891e-06, "loss": 0.8129, "step": 11400 }, { "epoch": 36.98, "learning_rate": 5.844791666666671e-06, "loss": 0.8112, "step": 11500 }, { "epoch": 36.98, "eval_cer": 0.03268677094773085, "eval_loss": 0.11033473163843155, "eval_runtime": 201.8103, "eval_samples_per_second": 28.75, "eval_steps_per_second": 0.451, "eval_wer": 0.17662466374062014, "step": 11500 }, { "epoch": 37.3, "learning_rate": 5.185069444444451e-06, "loss": 0.805, "step": 11600 }, { "epoch": 37.62, "learning_rate": 4.525347222222226e-06, "loss": 0.8108, "step": 11700 }, { "epoch": 37.94, "learning_rate": 3.865625000000006e-06, "loss": 0.8025, "step": 11800 }, { "epoch": 38.26, "learning_rate": 3.2059027777777807e-06, "loss": 0.8018, "step": 11900 }, { "epoch": 38.59, "learning_rate": 2.5461805555555606e-06, "loss": 0.8069, "step": 12000 }, { "epoch": 38.59, "eval_cer": 0.032260099136267845, "eval_loss": 0.10924588888883591, "eval_runtime": 199.7196, "eval_samples_per_second": 29.051, "eval_steps_per_second": 0.456, "eval_wer": 0.17520883477275945, "step": 12000 }, { "epoch": 38.59, "step": 12000, "total_flos": 1.0363087195555613e+21, "train_loss": 1.0786900800069172, "train_runtime": 133237.4383, "train_samples_per_second": 11.528, "train_steps_per_second": 0.09 } ], "max_steps": 12000, "num_train_epochs": 39, "total_flos": 1.0363087195555613e+21, "trial_name": null, "trial_params": null }