{ "best_metric": 7.581081081081082, "best_model_checkpoint": "./checkpoint-9000", "epoch": 5.48847420417124, "global_step": 10000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.27, "learning_rate": 9.9e-06, "loss": 0.365, "step": 500 }, { "epoch": 0.55, "learning_rate": 9.478947368421053e-06, "loss": 0.1912, "step": 1000 }, { "epoch": 0.55, "eval_cer": 7.035654676637185, "eval_loss": 0.18279600143432617, "eval_runtime": 7736.1561, "eval_samples_per_second": 0.595, "eval_steps_per_second": 0.074, "eval_wer": 11.23141891891892, "step": 1000 }, { "epoch": 0.82, "learning_rate": 8.95263157894737e-06, "loss": 0.1649, "step": 1500 }, { "epoch": 1.1, "learning_rate": 8.426315789473684e-06, "loss": 0.1329, "step": 2000 }, { "epoch": 1.1, "eval_cer": 5.9028005940255746, "eval_loss": 0.16182316839694977, "eval_runtime": 7650.4506, "eval_samples_per_second": 0.602, "eval_steps_per_second": 0.075, "eval_wer": 9.41722972972973, "step": 2000 }, { "epoch": 1.37, "learning_rate": 7.9e-06, "loss": 0.0954, "step": 2500 }, { "epoch": 1.65, "learning_rate": 7.373684210526316e-06, "loss": 0.0912, "step": 3000 }, { "epoch": 1.65, "eval_cer": 5.47108067971245, "eval_loss": 0.16157789528369904, "eval_runtime": 7517.9849, "eval_samples_per_second": 0.612, "eval_steps_per_second": 0.077, "eval_wer": 8.925675675675675, "step": 3000 }, { "epoch": 1.92, "learning_rate": 6.8473684210526325e-06, "loss": 0.0887, "step": 3500 }, { "epoch": 2.2, "learning_rate": 6.321052631578948e-06, "loss": 0.0576, "step": 4000 }, { "epoch": 2.2, "eval_cer": 5.305489479701936, "eval_loss": 0.1663784682750702, "eval_runtime": 7600.817, "eval_samples_per_second": 0.606, "eval_steps_per_second": 0.076, "eval_wer": 8.58614864864865, "step": 4000 }, { "epoch": 2.47, "learning_rate": 5.794736842105264e-06, "loss": 0.047, "step": 4500 }, { "epoch": 2.74, "learning_rate": 5.269473684210527e-06, "loss": 0.0449, "step": 5000 }, { "epoch": 2.74, "eval_cer": 5.2930044289074925, "eval_loss": 0.16421984136104584, "eval_runtime": 7476.5946, "eval_samples_per_second": 0.616, "eval_steps_per_second": 0.077, "eval_wer": 8.451013513513514, "step": 5000 }, { "epoch": 3.02, "learning_rate": 4.743157894736842e-06, "loss": 0.0431, "step": 5500 }, { "epoch": 3.29, "learning_rate": 4.217894736842106e-06, "loss": 0.02, "step": 6000 }, { "epoch": 3.29, "eval_cer": 5.035418117780027, "eval_loss": 0.17986369132995605, "eval_runtime": 7470.6199, "eval_samples_per_second": 0.616, "eval_steps_per_second": 0.077, "eval_wer": 8.153716216216218, "step": 6000 }, { "epoch": 3.57, "learning_rate": 3.6915789473684216e-06, "loss": 0.0207, "step": 6500 }, { "epoch": 3.84, "learning_rate": 3.1652631578947375e-06, "loss": 0.019, "step": 7000 }, { "epoch": 3.84, "eval_cer": 5.082729889211602, "eval_loss": 0.180050328373909, "eval_runtime": 7452.1074, "eval_samples_per_second": 0.618, "eval_steps_per_second": 0.077, "eval_wer": 8.125, "step": 7000 }, { "epoch": 4.12, "learning_rate": 2.6389473684210526e-06, "loss": 0.014, "step": 7500 }, { "epoch": 4.39, "learning_rate": 2.1126315789473685e-06, "loss": 0.0067, "step": 8000 }, { "epoch": 4.39, "eval_cer": 4.8133156352262425, "eval_loss": 0.20030897855758667, "eval_runtime": 7521.4475, "eval_samples_per_second": 0.612, "eval_steps_per_second": 0.077, "eval_wer": 7.841216216216217, "step": 8000 }, { "epoch": 4.67, "learning_rate": 1.5863157894736845e-06, "loss": 0.0062, "step": 8500 }, { "epoch": 4.94, "learning_rate": 1.06e-06, "loss": 0.006, "step": 9000 }, { "epoch": 4.94, "eval_cer": 4.70226439394935, "eval_loss": 0.2070685774087906, "eval_runtime": 7515.5986, "eval_samples_per_second": 0.613, "eval_steps_per_second": 0.077, "eval_wer": 7.581081081081082, "step": 9000 }, { "epoch": 5.21, "learning_rate": 5.336842105263159e-07, "loss": 0.0029, "step": 9500 }, { "epoch": 5.49, "learning_rate": 7.3684210526315796e-09, "loss": 0.0022, "step": 10000 }, { "epoch": 5.49, "eval_cer": 4.718692092363091, "eval_loss": 0.2283746749162674, "eval_runtime": 7466.269, "eval_samples_per_second": 0.617, "eval_steps_per_second": 0.077, "eval_wer": 7.64527027027027, "step": 10000 }, { "epoch": 5.49, "step": 10000, "total_flos": 3.39643668197376e+20, "train_loss": 0.07097679934501648, "train_runtime": 114325.2073, "train_samples_per_second": 1.4, "train_steps_per_second": 0.087 } ], "max_steps": 10000, "num_train_epochs": 6, "total_flos": 3.39643668197376e+20, "trial_name": null, "trial_params": null }