{ "best_metric": 0.22928521037101746, "best_model_checkpoint": "./checkpoint-4000", "epoch": 70.17543859649123, "global_step": 4000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.75, "learning_rate": 1.7851749999999994e-05, "loss": 9.5756, "step": 100 }, { "epoch": 3.51, "learning_rate": 3.517674999999999e-05, "loss": 4.1914, "step": 200 }, { "epoch": 5.26, "learning_rate": 5.250174999999999e-05, "loss": 3.3316, "step": 300 }, { "epoch": 7.02, "learning_rate": 6.982674999999999e-05, "loss": 3.1471, "step": 400 }, { "epoch": 7.02, "eval_cer": 1.0, "eval_loss": 3.1599209308624268, "eval_runtime": 8.4621, "eval_samples_per_second": 39.588, "eval_steps_per_second": 0.709, "eval_wer": 1.0, "step": 400 }, { "epoch": 8.77, "learning_rate": 7e-05, "loss": 3.0957, "step": 500 }, { "epoch": 10.53, "learning_rate": 7e-05, "loss": 2.8518, "step": 600 }, { "epoch": 12.28, "learning_rate": 7e-05, "loss": 2.3182, "step": 700 }, { "epoch": 14.04, "learning_rate": 7e-05, "loss": 1.8691, "step": 800 }, { "epoch": 14.04, "eval_cer": 0.1685887708649469, "eval_loss": 0.767376184463501, "eval_runtime": 8.3651, "eval_samples_per_second": 40.048, "eval_steps_per_second": 0.717, "eval_wer": 0.7361436377829821, "step": 800 }, { "epoch": 15.79, "learning_rate": 7e-05, "loss": 1.6182, "step": 900 }, { "epoch": 17.54, "learning_rate": 7e-05, "loss": 1.4813, "step": 1000 }, { "epoch": 19.3, "learning_rate": 7e-05, "loss": 1.4096, "step": 1100 }, { "epoch": 21.05, "learning_rate": 7e-05, "loss": 1.3227, "step": 1200 }, { "epoch": 21.05, "eval_cer": 0.10065756196256954, "eval_loss": 0.38490724563598633, "eval_runtime": 8.5046, "eval_samples_per_second": 39.39, "eval_steps_per_second": 0.705, "eval_wer": 0.5335675253708041, "step": 1200 }, { "epoch": 22.81, "learning_rate": 7e-05, "loss": 1.2765, "step": 1300 }, { "epoch": 24.56, "learning_rate": 7e-05, "loss": 1.2296, "step": 1400 }, { "epoch": 26.32, "learning_rate": 7e-05, "loss": 1.1942, "step": 1500 }, { "epoch": 28.07, "learning_rate": 7e-05, "loss": 1.163, "step": 1600 }, { "epoch": 28.07, "eval_cer": 0.08229640870005059, "eval_loss": 0.30146270990371704, "eval_runtime": 8.4606, "eval_samples_per_second": 39.595, "eval_steps_per_second": 0.709, "eval_wer": 0.4558938329430133, "step": 1600 }, { "epoch": 29.82, "learning_rate": 7e-05, "loss": 1.1351, "step": 1700 }, { "epoch": 31.58, "learning_rate": 7e-05, "loss": 1.1201, "step": 1800 }, { "epoch": 33.33, "learning_rate": 7e-05, "loss": 1.1014, "step": 1900 }, { "epoch": 35.09, "learning_rate": 7e-05, "loss": 1.0768, "step": 2000 }, { "epoch": 35.09, "eval_cer": 0.07278705108750633, "eval_loss": 0.27209824323654175, "eval_runtime": 8.2339, "eval_samples_per_second": 40.685, "eval_steps_per_second": 0.729, "eval_wer": 0.4032006245120999, "step": 2000 }, { "epoch": 36.84, "learning_rate": 6.670824999999999e-05, "loss": 1.063, "step": 2100 }, { "epoch": 38.6, "learning_rate": 6.338324999999999e-05, "loss": 1.0538, "step": 2200 }, { "epoch": 40.35, "learning_rate": 6.005824999999999e-05, "loss": 1.0439, "step": 2300 }, { "epoch": 42.11, "learning_rate": 5.673325e-05, "loss": 1.0224, "step": 2400 }, { "epoch": 42.11, "eval_cer": 0.06914516944865959, "eval_loss": 0.2586216628551483, "eval_runtime": 8.3142, "eval_samples_per_second": 40.292, "eval_steps_per_second": 0.722, "eval_wer": 0.3825136612021858, "step": 2400 }, { "epoch": 43.86, "learning_rate": 5.340824999999999e-05, "loss": 1.0116, "step": 2500 }, { "epoch": 45.61, "learning_rate": 5.008325e-05, "loss": 0.9925, "step": 2600 }, { "epoch": 47.37, "learning_rate": 4.675824999999999e-05, "loss": 0.9877, "step": 2700 }, { "epoch": 49.12, "learning_rate": 4.343324999999999e-05, "loss": 0.9817, "step": 2800 }, { "epoch": 49.12, "eval_cer": 0.06525037936267071, "eval_loss": 0.24575529992580414, "eval_runtime": 8.5545, "eval_samples_per_second": 39.16, "eval_steps_per_second": 0.701, "eval_wer": 0.36533957845433257, "step": 2800 }, { "epoch": 50.88, "learning_rate": 4.010825e-05, "loss": 0.9781, "step": 2900 }, { "epoch": 52.63, "learning_rate": 3.6816499999999996e-05, "loss": 0.9625, "step": 3000 }, { "epoch": 54.39, "learning_rate": 3.34915e-05, "loss": 0.9499, "step": 3100 }, { "epoch": 56.14, "learning_rate": 3.01665e-05, "loss": 0.941, "step": 3200 }, { "epoch": 56.14, "eval_cer": 0.06054628224582701, "eval_loss": 0.23060913383960724, "eval_runtime": 8.3111, "eval_samples_per_second": 40.307, "eval_steps_per_second": 0.722, "eval_wer": 0.33879781420765026, "step": 3200 }, { "epoch": 57.89, "learning_rate": 2.6841499999999994e-05, "loss": 0.9375, "step": 3300 }, { "epoch": 59.65, "learning_rate": 2.3516499999999998e-05, "loss": 0.9297, "step": 3400 }, { "epoch": 61.4, "learning_rate": 2.0191499999999998e-05, "loss": 0.9218, "step": 3500 }, { "epoch": 63.16, "learning_rate": 1.68665e-05, "loss": 0.9235, "step": 3600 }, { "epoch": 63.16, "eval_cer": 0.06150733434496712, "eval_loss": 0.23152786493301392, "eval_runtime": 8.3332, "eval_samples_per_second": 40.201, "eval_steps_per_second": 0.72, "eval_wer": 0.33801717408274784, "step": 3600 }, { "epoch": 64.91, "learning_rate": 1.3541499999999999e-05, "loss": 0.9173, "step": 3700 }, { "epoch": 66.67, "learning_rate": 1.0216500000000001e-05, "loss": 0.9136, "step": 3800 }, { "epoch": 68.42, "learning_rate": 6.891500000000002e-06, "loss": 0.9107, "step": 3900 }, { "epoch": 70.18, "learning_rate": 3.566500000000003e-06, "loss": 0.9141, "step": 4000 }, { "epoch": 70.18, "eval_cer": 0.06019221041982802, "eval_loss": 0.22928521037101746, "eval_runtime": 8.619, "eval_samples_per_second": 38.868, "eval_steps_per_second": 0.696, "eval_wer": 0.3333333333333333, "step": 4000 }, { "epoch": 70.18, "step": 4000, "total_flos": 1.1026564721970925e+20, "train_loss": 1.6216355571746826, "train_runtime": 16397.7621, "train_samples_per_second": 31.224, "train_steps_per_second": 0.244 } ], "max_steps": 4000, "num_train_epochs": 71, "total_flos": 1.1026564721970925e+20, "trial_name": null, "trial_params": null }