{ "best_metric": null, "best_model_checkpoint": null, "epoch": 11.999599519423308, "global_step": 7488, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.48, "learning_rate": 9.836092715231788e-05, "loss": 7.1097, "step": 300 }, { "epoch": 0.48, "eval_cer": 0.9965361196010141, "eval_loss": 3.3989405632019043, "eval_runtime": 296.7149, "eval_samples_per_second": 12.952, "eval_steps_per_second": 1.621, "eval_wer": 1.0, "step": 300 }, { "epoch": 0.96, "learning_rate": 9.339403973509933e-05, "loss": 3.0235, "step": 600 }, { "epoch": 0.96, "eval_cer": 0.31626033596322883, "eval_loss": 1.3183486461639404, "eval_runtime": 296.8438, "eval_samples_per_second": 12.946, "eval_steps_per_second": 1.62, "eval_wer": 0.7976831955321887, "step": 600 }, { "epoch": 1.44, "learning_rate": 8.84271523178808e-05, "loss": 1.1419, "step": 900 }, { "epoch": 1.44, "eval_cer": 0.19131423697490937, "eval_loss": 0.6415870785713196, "eval_runtime": 297.4006, "eval_samples_per_second": 12.922, "eval_steps_per_second": 1.617, "eval_wer": 0.5543314644734832, "step": 900 }, { "epoch": 1.92, "learning_rate": 8.346026490066225e-05, "loss": 0.8242, "step": 1200 }, { "epoch": 1.92, "eval_cer": 0.16081218755182788, "eval_loss": 0.5063228011131287, "eval_runtime": 296.5772, "eval_samples_per_second": 12.958, "eval_steps_per_second": 1.622, "eval_wer": 0.4804206025436855, "step": 1200 }, { "epoch": 2.56, "learning_rate": 7.683774834437087e-05, "loss": 0.6876, "step": 1600 }, { "epoch": 2.56, "eval_cer": 0.1387068495747151, "eval_loss": 0.44008567929267883, "eval_runtime": 296.3457, "eval_samples_per_second": 12.968, "eval_steps_per_second": 1.623, "eval_wer": 0.42799799297541397, "step": 1600 }, { "epoch": 3.21, "learning_rate": 7.021523178807947e-05, "loss": 0.5868, "step": 2000 }, { "epoch": 3.21, "eval_cer": 0.12487975928163575, "eval_loss": 0.39397352933883667, "eval_runtime": 295.9984, "eval_samples_per_second": 12.983, "eval_steps_per_second": 1.625, "eval_wer": 0.3906716987717882, "step": 2000 }, { "epoch": 3.85, "learning_rate": 6.359271523178809e-05, "loss": 0.5285, "step": 2400 }, { "epoch": 3.85, "eval_cer": 0.1200464377946786, "eval_loss": 0.36609867215156555, "eval_runtime": 296.3002, "eval_samples_per_second": 12.97, "eval_steps_per_second": 1.623, "eval_wer": 0.3762952943999651, "step": 2400 }, { "epoch": 4.49, "learning_rate": 6.433863885839737e-05, "loss": 0.5, "step": 2800 }, { "epoch": 4.49, "eval_cer": 0.11358305494349279, "eval_loss": 0.35275039076805115, "eval_runtime": 294.0242, "eval_samples_per_second": 13.07, "eval_steps_per_second": 1.636, "eval_wer": 0.36098082418900934, "step": 2800 }, { "epoch": 5.13, "learning_rate": 5.885016465422613e-05, "loss": 0.4538, "step": 3200 }, { "epoch": 5.13, "eval_cer": 0.10862653114412302, "eval_loss": 0.34030598402023315, "eval_runtime": 295.0902, "eval_samples_per_second": 13.023, "eval_steps_per_second": 1.63, "eval_wer": 0.34854599794934443, "step": 3200 }, { "epoch": 5.77, "learning_rate": 5.336169045005489e-05, "loss": 0.4165, "step": 3600 }, { "epoch": 5.77, "eval_cer": 0.1061861776482574, "eval_loss": 0.33348363637924194, "eval_runtime": 294.4649, "eval_samples_per_second": 13.051, "eval_steps_per_second": 1.633, "eval_wer": 0.3438774842383124, "step": 3600 }, { "epoch": 6.41, "learning_rate": 4.787321624588365e-05, "loss": 0.3989, "step": 4000 }, { "epoch": 6.41, "eval_cer": 0.1035941905371147, "eval_loss": 0.3264155387878418, "eval_runtime": 294.7693, "eval_samples_per_second": 13.037, "eval_steps_per_second": 1.632, "eval_wer": 0.3339950697004734, "step": 4000 }, { "epoch": 7.05, "learning_rate": 4.2384742041712404e-05, "loss": 0.3679, "step": 4400 }, { "epoch": 7.05, "eval_cer": 0.10127703935366171, "eval_loss": 0.3256165385246277, "eval_runtime": 293.9503, "eval_samples_per_second": 13.074, "eval_steps_per_second": 1.636, "eval_wer": 0.3287375379044045, "step": 4400 }, { "epoch": 7.69, "learning_rate": 3.689626783754117e-05, "loss": 0.3517, "step": 4800 }, { "epoch": 7.69, "eval_cer": 0.10020612694576729, "eval_loss": 0.3212122619152069, "eval_runtime": 293.3063, "eval_samples_per_second": 13.102, "eval_steps_per_second": 1.64, "eval_wer": 0.32230196993826216, "step": 4800 }, { "epoch": 8.33, "learning_rate": 3.142151481888035e-05, "loss": 0.3357, "step": 5200 }, { "epoch": 8.33, "eval_cer": 0.09861871253583529, "eval_loss": 0.317330539226532, "eval_runtime": 293.5785, "eval_samples_per_second": 13.09, "eval_steps_per_second": 1.638, "eval_wer": 0.3196404808132813, "step": 5200 }, { "epoch": 8.97, "learning_rate": 2.5933040614709113e-05, "loss": 0.3225, "step": 5600 }, { "epoch": 8.97, "eval_cer": 0.09852868007676452, "eval_loss": 0.31416377425193787, "eval_runtime": 294.148, "eval_samples_per_second": 13.065, "eval_steps_per_second": 1.635, "eval_wer": 0.31769890268112305, "step": 5600 }, { "epoch": 9.62, "learning_rate": 2.044456641053787e-05, "loss": 0.3057, "step": 6000 }, { "epoch": 9.62, "eval_cer": 0.09754780012794086, "eval_loss": 0.31993794441223145, "eval_runtime": 293.1881, "eval_samples_per_second": 13.108, "eval_steps_per_second": 1.641, "eval_wer": 0.3155828006719169, "step": 6000 }, { "epoch": 10.26, "learning_rate": 1.4956092206366632e-05, "loss": 0.2972, "step": 6400 }, { "epoch": 10.26, "eval_cer": 0.09668064539057503, "eval_loss": 0.3139244616031647, "eval_runtime": 292.3994, "eval_samples_per_second": 13.143, "eval_steps_per_second": 1.645, "eval_wer": 0.3127686031545191, "step": 6400 }, { "epoch": 10.9, "learning_rate": 9.46761800219539e-06, "loss": 0.2881, "step": 6800 }, { "epoch": 10.9, "eval_cer": 0.09570450399222878, "eval_loss": 0.31835824251174927, "eval_runtime": 293.4248, "eval_samples_per_second": 13.097, "eval_steps_per_second": 1.639, "eval_wer": 0.31065250114531295, "step": 6800 }, { "epoch": 11.54, "learning_rate": 3.979143798024149e-06, "loss": 0.2791, "step": 7200 }, { "epoch": 11.54, "eval_cer": 0.09584192195607363, "eval_loss": 0.3184186816215515, "eval_runtime": 293.2834, "eval_samples_per_second": 13.103, "eval_steps_per_second": 1.64, "eval_wer": 0.31036889984511007, "step": 7200 }, { "epoch": 12.0, "step": 7488, "total_flos": 5.018412278085267e+19, "train_loss": 0.24150397533025497, "train_runtime": 25397.5173, "train_samples_per_second": 18.876, "train_steps_per_second": 0.295 } ], "max_steps": 7488, "num_train_epochs": 12, "total_flos": 5.018412278085267e+19, "trial_name": null, "trial_params": null }