{ "best_metric": null, "best_model_checkpoint": null, "epoch": 7.909466234149218, "global_step": 6700, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.4, "eval_cer": 0.2550782058484374, "eval_loss": 0.9434235095977783, "eval_runtime": 65.8936, "eval_samples_per_second": 7.588, "eval_steps_per_second": 0.956, "eval_wer": 0.8986276613768566, "step": 335 }, { "epoch": 0.47, "learning_rate": 0.00023999999999999998, "loss": 3.5921, "step": 400 }, { "epoch": 0.79, "eval_cer": 0.05617811418940895, "eval_loss": 0.2126482129096985, "eval_runtime": 65.2511, "eval_samples_per_second": 7.663, "eval_steps_per_second": 0.966, "eval_wer": 0.20076805783023663, "step": 670 }, { "epoch": 0.94, "learning_rate": 0.00028565965583173995, "loss": 0.3347, "step": 800 }, { "epoch": 1.19, "eval_cer": 0.052689157623961445, "eval_loss": 0.19026830792427063, "eval_runtime": 66.2853, "eval_samples_per_second": 7.543, "eval_steps_per_second": 0.95, "eval_wer": 0.18907776585531147, "step": 1005 }, { "epoch": 1.42, "learning_rate": 0.0002665391969407266, "loss": 0.1948, "step": 1200 }, { "epoch": 1.58, "eval_cer": 0.04130569764347595, "eval_loss": 0.1462544947862625, "eval_runtime": 65.903, "eval_samples_per_second": 7.587, "eval_steps_per_second": 0.956, "eval_wer": 0.13915400688993054, "step": 1340 }, { "epoch": 1.89, "learning_rate": 0.00024741873804971315, "loss": 0.1737, "step": 1600 }, { "epoch": 1.98, "eval_cer": 0.040093433074125544, "eval_loss": 0.1409013271331787, "eval_runtime": 66.3053, "eval_samples_per_second": 7.541, "eval_steps_per_second": 0.95, "eval_wer": 0.13599141582425028, "step": 1675 }, { "epoch": 2.36, "learning_rate": 0.00022829827915869978, "loss": 0.1466, "step": 2000 }, { "epoch": 2.37, "eval_cer": 0.03887131269526822, "eval_loss": 0.1429334431886673, "eval_runtime": 65.9993, "eval_samples_per_second": 7.576, "eval_steps_per_second": 0.955, "eval_wer": 0.13401479640820016, "step": 2010 }, { "epoch": 2.77, "eval_cer": 0.03858549421956772, "eval_loss": 0.1421019285917282, "eval_runtime": 66.1016, "eval_samples_per_second": 7.564, "eval_steps_per_second": 0.953, "eval_wer": 0.12678601682950247, "step": 2345 }, { "epoch": 2.83, "learning_rate": 0.0002091778202676864, "loss": 0.1378, "step": 2400 }, { "epoch": 3.16, "eval_cer": 0.03703813212698225, "eval_loss": 0.1354704052209854, "eval_runtime": 65.9839, "eval_samples_per_second": 7.578, "eval_steps_per_second": 0.955, "eval_wer": 0.12305867735923647, "step": 2680 }, { "epoch": 3.31, "learning_rate": 0.00019005736137667304, "loss": 0.1217, "step": 2800 }, { "epoch": 3.56, "eval_cer": 0.034948700511516516, "eval_loss": 0.13004331290721893, "eval_runtime": 66.7534, "eval_samples_per_second": 7.49, "eval_steps_per_second": 0.944, "eval_wer": 0.11176370926752131, "step": 3015 }, { "epoch": 3.78, "learning_rate": 0.00017093690248565967, "loss": 0.1121, "step": 3200 }, { "epoch": 3.95, "eval_cer": 0.03565831879601431, "eval_loss": 0.1295933872461319, "eval_runtime": 67.056, "eval_samples_per_second": 7.456, "eval_steps_per_second": 0.94, "eval_wer": 0.11729824363246173, "step": 3350 }, { "epoch": 4.25, "learning_rate": 0.00015181644359464624, "loss": 0.1038, "step": 3600 }, { "epoch": 4.35, "eval_cer": 0.034948700511516516, "eval_loss": 0.13280533254146576, "eval_runtime": 66.5598, "eval_samples_per_second": 7.512, "eval_steps_per_second": 0.947, "eval_wer": 0.11080363697972553, "step": 3685 }, { "epoch": 4.72, "learning_rate": 0.00013269598470363287, "loss": 0.0941, "step": 4000 }, { "epoch": 4.75, "eval_cer": 0.035214807368203184, "eval_loss": 0.12650151550769806, "eval_runtime": 66.4767, "eval_samples_per_second": 7.521, "eval_steps_per_second": 0.948, "eval_wer": 0.1124978821934828, "step": 4020 }, { "epoch": 5.14, "eval_cer": 0.034012398608359695, "eval_loss": 0.1327013224363327, "eval_runtime": 66.3907, "eval_samples_per_second": 7.531, "eval_steps_per_second": 0.949, "eval_wer": 0.10718924719037669, "step": 4355 }, { "epoch": 5.19, "learning_rate": 0.0001135755258126195, "loss": 0.0862, "step": 4400 }, { "epoch": 5.54, "eval_cer": 0.0352246631777101, "eval_loss": 0.1414576768875122, "eval_runtime": 66.6897, "eval_samples_per_second": 7.497, "eval_steps_per_second": 0.945, "eval_wer": 0.11294968091715141, "step": 4690 }, { "epoch": 5.67, "learning_rate": 9.44550669216061e-05, "loss": 0.0748, "step": 4800 }, { "epoch": 5.93, "eval_cer": 0.03466288203581601, "eval_loss": 0.13079801201820374, "eval_runtime": 66.6475, "eval_samples_per_second": 7.502, "eval_steps_per_second": 0.945, "eval_wer": 0.1101824137346812, "step": 5025 }, { "epoch": 6.14, "learning_rate": 7.533460803059272e-05, "loss": 0.0727, "step": 5200 }, { "epoch": 6.33, "eval_cer": 0.03377585918019377, "eval_loss": 0.1386057287454605, "eval_runtime": 66.5005, "eval_samples_per_second": 7.519, "eval_steps_per_second": 0.947, "eval_wer": 0.10470435421019936, "step": 5360 }, { "epoch": 6.61, "learning_rate": 5.621414913957934e-05, "loss": 0.0622, "step": 5600 }, { "epoch": 6.72, "eval_cer": 0.0333717709904103, "eval_loss": 0.14488892257213593, "eval_runtime": 66.3875, "eval_samples_per_second": 7.532, "eval_steps_per_second": 0.949, "eval_wer": 0.10509967809340938, "step": 5695 }, { "epoch": 7.08, "learning_rate": 3.7093690248565965e-05, "loss": 0.0582, "step": 6000 }, { "epoch": 7.12, "eval_cer": 0.03320422222879276, "eval_loss": 0.14533209800720215, "eval_runtime": 66.3474, "eval_samples_per_second": 7.536, "eval_steps_per_second": 0.95, "eval_wer": 0.10493025357203366, "step": 6030 }, { "epoch": 7.51, "eval_cer": 0.03320422222879276, "eval_loss": 0.14391696453094482, "eval_runtime": 66.4699, "eval_samples_per_second": 7.522, "eval_steps_per_second": 0.948, "eval_wer": 0.10515615293386796, "step": 6365 }, { "epoch": 7.56, "learning_rate": 1.8021032504780113e-05, "loss": 0.0493, "step": 6400 }, { "epoch": 7.91, "eval_cer": 0.03264244108689867, "eval_loss": 0.14469173550605774, "eval_runtime": 65.8617, "eval_samples_per_second": 7.592, "eval_steps_per_second": 0.957, "eval_wer": 0.10267125995369063, "step": 6700 } ], "max_steps": 6776, "num_train_epochs": 8, "total_flos": 2.1224029808181802e+20, "trial_name": null, "trial_params": null }