{ "best_metric": 0.18266724050045013, "best_model_checkpoint": "./checkpoint-3200", "epoch": 56.14035087719298, "global_step": 3200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 3.51, "learning_rate": 4.980500000000001e-05, "loss": 4.0311, "step": 200 }, { "epoch": 3.51, "eval_cer": 0.23737986848760748, "eval_loss": 0.794314444065094, "eval_runtime": 15.7, "eval_samples_per_second": 21.338, "eval_steps_per_second": 0.382, "eval_wer": 0.8981264637002342, "step": 200 }, { "epoch": 7.02, "learning_rate": 8e-05, "loss": 1.4388, "step": 400 }, { "epoch": 7.02, "eval_cer": 0.06575619625695499, "eval_loss": 0.2545942962169647, "eval_runtime": 14.891, "eval_samples_per_second": 22.497, "eval_steps_per_second": 0.403, "eval_wer": 0.38212334113973456, "step": 400 }, { "epoch": 10.53, "learning_rate": 8e-05, "loss": 1.0949, "step": 600 }, { "epoch": 10.53, "eval_cer": 0.057309054122407686, "eval_loss": 0.22006691992282867, "eval_runtime": 15.2331, "eval_samples_per_second": 21.992, "eval_steps_per_second": 0.394, "eval_wer": 0.32162373145979706, "step": 600 }, { "epoch": 14.04, "learning_rate": 8e-05, "loss": 1.0279, "step": 800 }, { "epoch": 14.04, "eval_cer": 0.0582701062215478, "eval_loss": 0.22504645586013794, "eval_runtime": 15.1767, "eval_samples_per_second": 22.073, "eval_steps_per_second": 0.395, "eval_wer": 0.32708821233411395, "step": 800 }, { "epoch": 17.54, "learning_rate": 8e-05, "loss": 0.9923, "step": 1000 }, { "epoch": 17.54, "eval_cer": 0.0543247344461305, "eval_loss": 0.2073642760515213, "eval_runtime": 15.1305, "eval_samples_per_second": 22.141, "eval_steps_per_second": 0.397, "eval_wer": 0.3110850897736144, "step": 1000 }, { "epoch": 21.05, "learning_rate": 8e-05, "loss": 0.972, "step": 1200 }, { "epoch": 21.05, "eval_cer": 0.05361659079413252, "eval_loss": 0.21649114787578583, "eval_runtime": 14.9827, "eval_samples_per_second": 22.359, "eval_steps_per_second": 0.4, "eval_wer": 0.29547228727556596, "step": 1200 }, { "epoch": 24.56, "learning_rate": 8e-05, "loss": 0.9587, "step": 1400 }, { "epoch": 24.56, "eval_cer": 0.05351542741527567, "eval_loss": 0.2064175009727478, "eval_runtime": 15.1921, "eval_samples_per_second": 22.051, "eval_steps_per_second": 0.395, "eval_wer": 0.3017174082747853, "step": 1400 }, { "epoch": 28.07, "learning_rate": 8e-05, "loss": 0.9421, "step": 1600 }, { "epoch": 28.07, "eval_cer": 0.051947395042994435, "eval_loss": 0.2061864584684372, "eval_runtime": 15.0418, "eval_samples_per_second": 22.271, "eval_steps_per_second": 0.399, "eval_wer": 0.28844652615144417, "step": 1600 }, { "epoch": 31.58, "learning_rate": 7.059500000000001e-05, "loss": 0.9189, "step": 1800 }, { "epoch": 31.58, "eval_cer": 0.05073343449671219, "eval_loss": 0.2014162391424179, "eval_runtime": 15.125, "eval_samples_per_second": 22.149, "eval_steps_per_second": 0.397, "eval_wer": 0.2822014051522248, "step": 1800 }, { "epoch": 35.09, "learning_rate": 6.109500000000001e-05, "loss": 0.8919, "step": 2000 }, { "epoch": 35.09, "eval_cer": 0.04881133029843197, "eval_loss": 0.19518214464187622, "eval_runtime": 15.0854, "eval_samples_per_second": 22.207, "eval_steps_per_second": 0.398, "eval_wer": 0.2689305230288837, "step": 2000 }, { "epoch": 38.6, "learning_rate": 5.1594999999999996e-05, "loss": 0.8615, "step": 2200 }, { "epoch": 38.6, "eval_cer": 0.04795144157814871, "eval_loss": 0.20196911692619324, "eval_runtime": 15.1604, "eval_samples_per_second": 22.097, "eval_steps_per_second": 0.396, "eval_wer": 0.2685402029664325, "step": 2200 }, { "epoch": 42.11, "learning_rate": 4.2095e-05, "loss": 0.834, "step": 2400 }, { "epoch": 42.11, "eval_cer": 0.04668689934243804, "eval_loss": 0.2001034915447235, "eval_runtime": 15.091, "eval_samples_per_second": 22.199, "eval_steps_per_second": 0.398, "eval_wer": 0.2654176424668228, "step": 2400 }, { "epoch": 45.61, "learning_rate": 3.2595e-05, "loss": 0.8056, "step": 2600 }, { "epoch": 45.61, "eval_cer": 0.04481537683358624, "eval_loss": 0.1934908777475357, "eval_runtime": 15.4173, "eval_samples_per_second": 21.729, "eval_steps_per_second": 0.389, "eval_wer": 0.2498048399687744, "step": 2600 }, { "epoch": 49.12, "learning_rate": 2.3095e-05, "loss": 0.7888, "step": 2800 }, { "epoch": 49.12, "eval_cer": 0.04461305007587253, "eval_loss": 0.18915079534053802, "eval_runtime": 15.29, "eval_samples_per_second": 21.91, "eval_steps_per_second": 0.392, "eval_wer": 0.24512099921935987, "step": 2800 }, { "epoch": 52.63, "learning_rate": 1.3595000000000008e-05, "loss": 0.761, "step": 3000 }, { "epoch": 52.63, "eval_cer": 0.044056651492159836, "eval_loss": 0.18836112320423126, "eval_runtime": 16.204, "eval_samples_per_second": 20.674, "eval_steps_per_second": 0.37, "eval_wer": 0.24316939890710382, "step": 3000 }, { "epoch": 56.14, "learning_rate": 4.095000000000005e-06, "loss": 0.742, "step": 3200 }, { "epoch": 56.14, "eval_cer": 0.04274152756702074, "eval_loss": 0.18266724050045013, "eval_runtime": 15.3463, "eval_samples_per_second": 21.829, "eval_steps_per_second": 0.391, "eval_wer": 0.2388758782201405, "step": 3200 }, { "epoch": 56.14, "step": 3200, "total_flos": 2.6268075931237872e+20, "train_loss": 1.1288447761535645, "train_runtime": 34425.8492, "train_samples_per_second": 11.898, "train_steps_per_second": 0.093 } ], "max_steps": 3200, "num_train_epochs": 57, "total_flos": 2.6268075931237872e+20, "trial_name": null, "trial_params": null }