{ "best_metric": null, "best_model_checkpoint": null, "epoch": 7.909466234149218, "global_step": 6700, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.4, "eval_cer": 0.06875412712023102, "eval_loss": 0.2844763696193695, "eval_runtime": 65.9715, "eval_samples_per_second": 7.579, "eval_steps_per_second": 0.955, "eval_wer": 0.25628282600101654, "step": 335 }, { "epoch": 0.47, "learning_rate": 0.0002394, "loss": 3.0984, "step": 400 }, { "epoch": 0.79, "eval_cer": 0.04085233040615791, "eval_loss": 0.15484967827796936, "eval_runtime": 65.7396, "eval_samples_per_second": 7.606, "eval_steps_per_second": 0.958, "eval_wer": 0.1411306263059807, "step": 670 }, { "epoch": 0.94, "learning_rate": 0.0002857074569789675, "loss": 0.2022, "step": 800 }, { "epoch": 1.19, "eval_cer": 0.03730423898366892, "eval_loss": 0.14234833419322968, "eval_runtime": 65.8678, "eval_samples_per_second": 7.591, "eval_steps_per_second": 0.956, "eval_wer": 0.11916191336759474, "step": 1005 }, { "epoch": 1.42, "learning_rate": 0.0002665869980879541, "loss": 0.1458, "step": 1200 }, { "epoch": 1.58, "eval_cer": 0.03469244946433676, "eval_loss": 0.12395954132080078, "eval_runtime": 66.8459, "eval_samples_per_second": 7.48, "eval_steps_per_second": 0.942, "eval_wer": 0.11165075958660417, "step": 1340 }, { "epoch": 1.89, "learning_rate": 0.0002474665391969407, "loss": 0.1354, "step": 1600 }, { "epoch": 1.98, "eval_cer": 0.03423908222701871, "eval_loss": 0.12080004811286926, "eval_runtime": 65.8681, "eval_samples_per_second": 7.591, "eval_steps_per_second": 0.956, "eval_wer": 0.10814931947817247, "step": 1675 }, { "epoch": 2.36, "learning_rate": 0.00022834608030592734, "loss": 0.1169, "step": 2000 }, { "epoch": 2.37, "eval_cer": 0.03338162679991721, "eval_loss": 0.11872279644012451, "eval_runtime": 66.8663, "eval_samples_per_second": 7.478, "eval_steps_per_second": 0.942, "eval_wer": 0.10735867171175241, "step": 2010 }, { "epoch": 2.77, "eval_cer": 0.03327321289534116, "eval_loss": 0.12243428081274033, "eval_runtime": 67.2919, "eval_samples_per_second": 7.43, "eval_steps_per_second": 0.936, "eval_wer": 0.10199356186818773, "step": 2345 }, { "epoch": 2.83, "learning_rate": 0.00020922562141491394, "loss": 0.1107, "step": 2400 }, { "epoch": 3.16, "eval_cer": 0.0313808974700137, "eval_loss": 0.11623063683509827, "eval_runtime": 66.9398, "eval_samples_per_second": 7.469, "eval_steps_per_second": 0.941, "eval_wer": 0.09724967526966737, "step": 2680 }, { "epoch": 3.31, "learning_rate": 0.00019010516252390057, "loss": 0.0976, "step": 2800 }, { "epoch": 3.56, "eval_cer": 0.03366744527561771, "eval_loss": 0.11647358536720276, "eval_runtime": 67.2006, "eval_samples_per_second": 7.44, "eval_steps_per_second": 0.937, "eval_wer": 0.10052521601626475, "step": 3015 }, { "epoch": 3.78, "learning_rate": 0.0001709847036328872, "loss": 0.0914, "step": 3200 }, { "epoch": 3.95, "eval_cer": 0.03200181346894927, "eval_loss": 0.1139749139547348, "eval_runtime": 67.5106, "eval_samples_per_second": 7.406, "eval_steps_per_second": 0.933, "eval_wer": 0.09871802112159034, "step": 3350 }, { "epoch": 4.25, "learning_rate": 0.00015186424474187378, "loss": 0.0852, "step": 3600 }, { "epoch": 4.35, "eval_cer": 0.03156815785064506, "eval_loss": 0.12490493804216385, "eval_runtime": 67.225, "eval_samples_per_second": 7.438, "eval_steps_per_second": 0.937, "eval_wer": 0.09561190489636867, "step": 3685 }, { "epoch": 4.72, "learning_rate": 0.0001327437858508604, "loss": 0.0777, "step": 4000 }, { "epoch": 4.75, "eval_cer": 0.03226792032563595, "eval_loss": 0.11327209323644638, "eval_runtime": 67.468, "eval_samples_per_second": 7.411, "eval_steps_per_second": 0.934, "eval_wer": 0.09532953069407579, "step": 4020 }, { "epoch": 5.14, "eval_cer": 0.03157801366015198, "eval_loss": 0.1231861561536789, "eval_runtime": 67.6103, "eval_samples_per_second": 7.395, "eval_steps_per_second": 0.932, "eval_wer": 0.09521658101315864, "step": 4355 }, { "epoch": 5.19, "learning_rate": 0.00011362332695984703, "loss": 0.0714, "step": 4400 }, { "epoch": 5.54, "eval_cer": 0.032208785468594466, "eval_loss": 0.13348866999149323, "eval_runtime": 67.4714, "eval_samples_per_second": 7.411, "eval_steps_per_second": 0.934, "eval_wer": 0.09555543005591009, "step": 4690 }, { "epoch": 5.67, "learning_rate": 9.450286806883364e-05, "loss": 0.0631, "step": 4800 }, { "epoch": 5.93, "eval_cer": 0.03205109251648384, "eval_loss": 0.12228710949420929, "eval_runtime": 67.5997, "eval_samples_per_second": 7.396, "eval_steps_per_second": 0.932, "eval_wer": 0.09521658101315864, "step": 5025 }, { "epoch": 6.14, "learning_rate": 7.543021032504778e-05, "loss": 0.0621, "step": 5200 }, { "epoch": 6.33, "eval_cer": 0.032080659945004586, "eval_loss": 0.1337049901485443, "eval_runtime": 67.7409, "eval_samples_per_second": 7.381, "eval_steps_per_second": 0.93, "eval_wer": 0.09504715649178291, "step": 5360 }, { "epoch": 6.61, "learning_rate": 5.6309751434034414e-05, "loss": 0.0538, "step": 5600 }, { "epoch": 6.72, "eval_cer": 0.03255373880133645, "eval_loss": 0.13896578550338745, "eval_runtime": 67.8406, "eval_samples_per_second": 7.37, "eval_steps_per_second": 0.929, "eval_wer": 0.09431298356582142, "step": 5695 }, { "epoch": 7.08, "learning_rate": 3.718929254302103e-05, "loss": 0.0513, "step": 6000 }, { "epoch": 7.12, "eval_cer": 0.03282970146753004, "eval_loss": 0.1389976292848587, "eval_runtime": 68.1201, "eval_samples_per_second": 7.34, "eval_steps_per_second": 0.925, "eval_wer": 0.09549895521545151, "step": 6030 }, { "epoch": 7.51, "eval_cer": 0.03255373880133645, "eval_loss": 0.1410442739725113, "eval_runtime": 67.6146, "eval_samples_per_second": 7.395, "eval_steps_per_second": 0.932, "eval_wer": 0.09470830744903146, "step": 6365 }, { "epoch": 7.56, "learning_rate": 1.8068833652007646e-05, "loss": 0.0442, "step": 6400 }, { "epoch": 7.91, "eval_cer": 0.03214965061155298, "eval_loss": 0.1414366066455841, "eval_runtime": 68.0581, "eval_samples_per_second": 7.347, "eval_steps_per_second": 0.926, "eval_wer": 0.09453888292765573, "step": 6700 } ], "max_steps": 6776, "num_train_epochs": 8, "total_flos": 2.1224029808181802e+20, "trial_name": null, "trial_params": null }