{ "best_metric": 0.2248678207397461, "best_model_checkpoint": "./checkpoint-1600", "epoch": 61.53333333333333, "global_step": 1600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 3.84, "learning_rate": 4.980500000000001e-05, "loss": 4.9923, "step": 100 }, { "epoch": 3.84, "eval_cer": 1.0, "eval_loss": 3.156188488006592, "eval_runtime": 15.7049, "eval_samples_per_second": 21.331, "eval_steps_per_second": 0.382, "eval_wer": 1.0, "step": 100 }, { "epoch": 7.69, "learning_rate": 8e-05, "loss": 2.1775, "step": 200 }, { "epoch": 7.69, "eval_cer": 0.11219018715225089, "eval_loss": 0.43336454033851624, "eval_runtime": 15.2789, "eval_samples_per_second": 21.926, "eval_steps_per_second": 0.393, "eval_wer": 0.5804059328649492, "step": 200 }, { "epoch": 11.53, "learning_rate": 8e-05, "loss": 1.3708, "step": 300 }, { "epoch": 11.53, "eval_cer": 0.07966616084977238, "eval_loss": 0.3105751574039459, "eval_runtime": 15.048, "eval_samples_per_second": 22.262, "eval_steps_per_second": 0.399, "eval_wer": 0.4336455893832943, "step": 300 }, { "epoch": 15.38, "learning_rate": 8e-05, "loss": 1.2266, "step": 400 }, { "epoch": 15.38, "eval_cer": 0.06732422862923622, "eval_loss": 0.26751142740249634, "eval_runtime": 15.232, "eval_samples_per_second": 21.993, "eval_steps_per_second": 0.394, "eval_wer": 0.3672911787665886, "step": 400 }, { "epoch": 19.23, "learning_rate": 8e-05, "loss": 1.093, "step": 500 }, { "epoch": 19.23, "eval_cer": 0.06327769347496207, "eval_loss": 0.24162611365318298, "eval_runtime": 14.995, "eval_samples_per_second": 22.341, "eval_steps_per_second": 0.4, "eval_wer": 0.35011709601873536, "step": 500 }, { "epoch": 23.08, "learning_rate": 8e-05, "loss": 0.989, "step": 600 }, { "epoch": 23.08, "eval_cer": 0.06105209914011128, "eval_loss": 0.23200440406799316, "eval_runtime": 15.3525, "eval_samples_per_second": 21.821, "eval_steps_per_second": 0.391, "eval_wer": 0.3251366120218579, "step": 600 }, { "epoch": 26.91, "learning_rate": 8e-05, "loss": 0.9518, "step": 700 }, { "epoch": 26.91, "eval_cer": 0.05842185128983308, "eval_loss": 0.2413272261619568, "eval_runtime": 15.2385, "eval_samples_per_second": 21.984, "eval_steps_per_second": 0.394, "eval_wer": 0.3192818110850898, "step": 700 }, { "epoch": 30.76, "learning_rate": 8e-05, "loss": 0.9075, "step": 800 }, { "epoch": 30.76, "eval_cer": 0.05933232169954476, "eval_loss": 0.23544833064079285, "eval_runtime": 15.1938, "eval_samples_per_second": 22.049, "eval_steps_per_second": 0.395, "eval_wer": 0.3200624512099922, "step": 800 }, { "epoch": 34.61, "learning_rate": 7.059500000000001e-05, "loss": 0.878, "step": 900 }, { "epoch": 34.61, "eval_cer": 0.057916034395548814, "eval_loss": 0.22777308523654938, "eval_runtime": 14.9728, "eval_samples_per_second": 22.374, "eval_steps_per_second": 0.401, "eval_wer": 0.3126463700234192, "step": 900 }, { "epoch": 38.46, "learning_rate": 6.109500000000001e-05, "loss": 0.8563, "step": 1000 }, { "epoch": 38.46, "eval_cer": 0.054779969650986346, "eval_loss": 0.2326740324497223, "eval_runtime": 15.1749, "eval_samples_per_second": 22.076, "eval_steps_per_second": 0.395, "eval_wer": 0.2962529274004684, "step": 1000 }, { "epoch": 42.3, "learning_rate": 5.169000000000001e-05, "loss": 0.8084, "step": 1100 }, { "epoch": 42.3, "eval_cer": 0.05407182599898837, "eval_loss": 0.22712552547454834, "eval_runtime": 15.3083, "eval_samples_per_second": 21.884, "eval_steps_per_second": 0.392, "eval_wer": 0.2923497267759563, "step": 1100 }, { "epoch": 46.15, "learning_rate": 4.219000000000001e-05, "loss": 0.7845, "step": 1200 }, { "epoch": 46.15, "eval_cer": 0.053667172483560954, "eval_loss": 0.23326420783996582, "eval_runtime": 15.1559, "eval_samples_per_second": 22.104, "eval_steps_per_second": 0.396, "eval_wer": 0.29508196721311475, "step": 1200 }, { "epoch": 49.99, "learning_rate": 3.269000000000001e-05, "loss": 0.7487, "step": 1300 }, { "epoch": 49.99, "eval_cer": 0.052453211937278706, "eval_loss": 0.22895006835460663, "eval_runtime": 15.373, "eval_samples_per_second": 21.791, "eval_steps_per_second": 0.39, "eval_wer": 0.2888368462138954, "step": 1300 }, { "epoch": 53.84, "learning_rate": 2.319e-05, "loss": 0.7182, "step": 1400 }, { "epoch": 53.84, "eval_cer": 0.05346484572584724, "eval_loss": 0.23406584560871124, "eval_runtime": 15.2056, "eval_samples_per_second": 22.031, "eval_steps_per_second": 0.395, "eval_wer": 0.28766588602654175, "step": 1400 }, { "epoch": 57.69, "learning_rate": 1.369e-05, "loss": 0.7095, "step": 1500 }, { "epoch": 57.69, "eval_cer": 0.05154274152756702, "eval_loss": 0.22908572852611542, "eval_runtime": 15.2684, "eval_samples_per_second": 21.941, "eval_steps_per_second": 0.393, "eval_wer": 0.2818110850897736, "step": 1500 }, { "epoch": 61.53, "learning_rate": 4.190000000000005e-06, "loss": 0.6953, "step": 1600 }, { "epoch": 61.53, "eval_cer": 0.05078401618614062, "eval_loss": 0.2248678207397461, "eval_runtime": 15.2139, "eval_samples_per_second": 22.019, "eval_steps_per_second": 0.394, "eval_wer": 0.2782982045277127, "step": 1600 }, { "epoch": 61.53, "step": 1600, "total_flos": 1.3126730002882698e+20, "train_loss": 1.2442097234725953, "train_runtime": 17605.3989, "train_samples_per_second": 11.633, "train_steps_per_second": 0.091 } ], "max_steps": 1600, "num_train_epochs": 62, "total_flos": 1.3126730002882698e+20, "trial_name": null, "trial_params": null }