{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.981132075471699, "global_step": 390, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.25, "learning_rate": 1e-08, "loss": 8.1684, "step": 10 }, { "epoch": 0.25, "eval_loss": 8.85881519317627, "eval_runtime": 219.4921, "eval_samples_per_second": 19.481, "eval_steps_per_second": 9.741, "eval_wer": 1.0125384665750017, "step": 10 }, { "epoch": 0.5, "learning_rate": 9.736842105263159e-09, "loss": 8.1428, "step": 20 }, { "epoch": 0.5, "eval_loss": 8.856884956359863, "eval_runtime": 224.2813, "eval_samples_per_second": 19.065, "eval_steps_per_second": 9.533, "eval_wer": 1.012505729064362, "step": 20 }, { "epoch": 0.75, "learning_rate": 9.473684210526316e-09, "loss": 8.1333, "step": 30 }, { "epoch": 0.75, "eval_loss": 8.855155944824219, "eval_runtime": 226.0385, "eval_samples_per_second": 18.917, "eval_steps_per_second": 9.459, "eval_wer": 1.0124075165324429, "step": 30 }, { "epoch": 1.03, "learning_rate": 9.210526315789473e-09, "loss": 8.7873, "step": 40 }, { "epoch": 1.03, "eval_loss": 8.85318660736084, "eval_runtime": 220.4335, "eval_samples_per_second": 19.398, "eval_steps_per_second": 9.699, "eval_wer": 1.0124075165324429, "step": 40 }, { "epoch": 1.28, "learning_rate": 8.947368421052632e-09, "loss": 8.1298, "step": 50 }, { "epoch": 1.28, "eval_loss": 8.851649284362793, "eval_runtime": 224.7965, "eval_samples_per_second": 19.022, "eval_steps_per_second": 9.511, "eval_wer": 1.0124075165324429, "step": 50 }, { "epoch": 1.53, "learning_rate": 8.68421052631579e-09, "loss": 8.1445, "step": 60 }, { "epoch": 1.53, "eval_loss": 8.84989070892334, "eval_runtime": 220.475, "eval_samples_per_second": 19.394, "eval_steps_per_second": 9.697, "eval_wer": 1.0123420415111635, "step": 60 }, { "epoch": 1.78, "learning_rate": 8.421052631578947e-09, "loss": 8.1635, "step": 70 }, { "epoch": 1.78, "eval_loss": 8.8483304977417, "eval_runtime": 222.7151, "eval_samples_per_second": 19.199, "eval_steps_per_second": 9.6, "eval_wer": 1.0124075165324429, "step": 70 }, { "epoch": 2.05, "learning_rate": 8.157894736842106e-09, "loss": 8.7587, "step": 80 }, { "epoch": 2.05, "eval_loss": 8.846796989440918, "eval_runtime": 221.1122, "eval_samples_per_second": 19.339, "eval_steps_per_second": 9.669, "eval_wer": 1.0125384665750017, "step": 80 }, { "epoch": 2.3, "learning_rate": 7.894736842105263e-09, "loss": 8.1424, "step": 90 }, { "epoch": 2.3, "eval_loss": 8.845438957214355, "eval_runtime": 219.7737, "eval_samples_per_second": 19.456, "eval_steps_per_second": 9.728, "eval_wer": 1.0124075165324429, "step": 90 }, { "epoch": 2.55, "learning_rate": 7.631578947368422e-09, "loss": 8.1318, "step": 100 }, { "epoch": 2.55, "eval_loss": 8.844048500061035, "eval_runtime": 222.4326, "eval_samples_per_second": 19.224, "eval_steps_per_second": 9.612, "eval_wer": 1.0124402540430826, "step": 100 }, { "epoch": 2.81, "learning_rate": 7.368421052631579e-09, "loss": 8.1469, "step": 110 }, { "epoch": 2.81, "eval_loss": 8.842790603637695, "eval_runtime": 224.1044, "eval_samples_per_second": 19.08, "eval_steps_per_second": 9.54, "eval_wer": 1.012505729064362, "step": 110 }, { "epoch": 3.08, "learning_rate": 7.105263157894737e-09, "loss": 8.7602, "step": 120 }, { "epoch": 3.08, "eval_loss": 8.841601371765137, "eval_runtime": 222.7003, "eval_samples_per_second": 19.201, "eval_steps_per_second": 9.6, "eval_wer": 1.0124729915537223, "step": 120 }, { "epoch": 3.33, "learning_rate": 6.842105263157895e-09, "loss": 8.1584, "step": 130 }, { "epoch": 3.33, "eval_loss": 8.840473175048828, "eval_runtime": 220.9442, "eval_samples_per_second": 19.353, "eval_steps_per_second": 9.677, "eval_wer": 1.0125712040856414, "step": 130 }, { "epoch": 3.58, "learning_rate": 6.578947368421054e-09, "loss": 8.142, "step": 140 }, { "epoch": 3.58, "eval_loss": 8.839417457580566, "eval_runtime": 223.4762, "eval_samples_per_second": 19.134, "eval_steps_per_second": 9.567, "eval_wer": 1.0125712040856414, "step": 140 }, { "epoch": 3.83, "learning_rate": 6.31578947368421e-09, "loss": 8.1285, "step": 150 }, { "epoch": 3.83, "eval_loss": 8.838351249694824, "eval_runtime": 225.8637, "eval_samples_per_second": 18.932, "eval_steps_per_second": 9.466, "eval_wer": 1.0124075165324429, "step": 150 }, { "epoch": 4.1, "learning_rate": 6.052631578947369e-09, "loss": 8.7756, "step": 160 }, { "epoch": 4.1, "eval_loss": 8.837142944335938, "eval_runtime": 224.8527, "eval_samples_per_second": 19.017, "eval_steps_per_second": 9.508, "eval_wer": 1.0124075165324429, "step": 160 }, { "epoch": 4.35, "learning_rate": 5.789473684210527e-09, "loss": 8.0991, "step": 170 }, { "epoch": 4.35, "eval_loss": 8.83634090423584, "eval_runtime": 220.3123, "eval_samples_per_second": 19.409, "eval_steps_per_second": 9.704, "eval_wer": 1.0124729915537223, "step": 170 }, { "epoch": 4.6, "learning_rate": 5.526315789473685e-09, "loss": 8.1442, "step": 180 }, { "epoch": 4.6, "eval_loss": 8.83536434173584, "eval_runtime": 224.5432, "eval_samples_per_second": 19.043, "eval_steps_per_second": 9.522, "eval_wer": 1.0124402540430826, "step": 180 }, { "epoch": 4.86, "learning_rate": 5.263157894736842e-09, "loss": 8.1294, "step": 190 }, { "epoch": 4.86, "eval_loss": 8.834578514099121, "eval_runtime": 220.0402, "eval_samples_per_second": 19.433, "eval_steps_per_second": 9.716, "eval_wer": 1.0124075165324429, "step": 190 }, { "epoch": 5.13, "learning_rate": 5e-09, "loss": 8.7276, "step": 200 }, { "epoch": 5.13, "eval_loss": 8.833772659301758, "eval_runtime": 224.0823, "eval_samples_per_second": 19.082, "eval_steps_per_second": 9.541, "eval_wer": 1.0125384665750017, "step": 200 }, { "epoch": 5.38, "learning_rate": 4.736842105263158e-09, "loss": 8.1439, "step": 210 }, { "epoch": 5.38, "eval_loss": 8.832892417907715, "eval_runtime": 220.6908, "eval_samples_per_second": 19.376, "eval_steps_per_second": 9.688, "eval_wer": 1.0124402540430826, "step": 210 }, { "epoch": 5.63, "learning_rate": 4.473684210526316e-09, "loss": 8.1115, "step": 220 }, { "epoch": 5.63, "eval_loss": 8.832157135009766, "eval_runtime": 221.8649, "eval_samples_per_second": 19.273, "eval_steps_per_second": 9.636, "eval_wer": 1.0124402540430826, "step": 220 }, { "epoch": 5.88, "learning_rate": 4.210526315789473e-09, "loss": 8.1501, "step": 230 }, { "epoch": 5.88, "eval_loss": 8.831602096557617, "eval_runtime": 223.55, "eval_samples_per_second": 19.128, "eval_steps_per_second": 9.564, "eval_wer": 1.0125384665750017, "step": 230 }, { "epoch": 6.15, "learning_rate": 3.947368421052631e-09, "loss": 8.7143, "step": 240 }, { "epoch": 6.15, "eval_loss": 8.830825805664062, "eval_runtime": 224.3279, "eval_samples_per_second": 19.061, "eval_steps_per_second": 9.531, "eval_wer": 1.0124075165324429, "step": 240 }, { "epoch": 6.4, "learning_rate": 3.6842105263157894e-09, "loss": 8.143, "step": 250 }, { "epoch": 6.4, "eval_loss": 8.830228805541992, "eval_runtime": 225.4738, "eval_samples_per_second": 18.965, "eval_steps_per_second": 9.482, "eval_wer": 1.0123747790218032, "step": 250 }, { "epoch": 6.65, "learning_rate": 3.4210526315789474e-09, "loss": 8.1528, "step": 260 }, { "epoch": 6.65, "eval_loss": 8.829960823059082, "eval_runtime": 222.8802, "eval_samples_per_second": 19.185, "eval_steps_per_second": 9.593, "eval_wer": 1.0124729915537223, "step": 260 }, { "epoch": 6.91, "learning_rate": 3.1842105263157894e-09, "loss": 8.1293, "step": 270 }, { "epoch": 6.91, "eval_loss": 8.829716682434082, "eval_runtime": 223.7307, "eval_samples_per_second": 19.112, "eval_steps_per_second": 9.556, "eval_wer": 1.0124075165324429, "step": 270 }, { "epoch": 7.18, "learning_rate": 2.9210526315789475e-09, "loss": 8.7519, "step": 280 }, { "epoch": 7.18, "eval_loss": 8.829301834106445, "eval_runtime": 223.0404, "eval_samples_per_second": 19.171, "eval_steps_per_second": 9.586, "eval_wer": 1.0124729915537223, "step": 280 }, { "epoch": 7.43, "learning_rate": 2.657894736842105e-09, "loss": 8.1153, "step": 290 }, { "epoch": 7.43, "eval_loss": 8.828947067260742, "eval_runtime": 219.8129, "eval_samples_per_second": 19.453, "eval_steps_per_second": 9.726, "eval_wer": 1.0124075165324429, "step": 290 }, { "epoch": 7.68, "learning_rate": 2.394736842105263e-09, "loss": 8.1292, "step": 300 }, { "epoch": 7.68, "eval_loss": 8.828753471374512, "eval_runtime": 222.9513, "eval_samples_per_second": 19.179, "eval_steps_per_second": 9.59, "eval_wer": 1.0124402540430826, "step": 300 }, { "epoch": 7.93, "learning_rate": 2.131578947368421e-09, "loss": 8.0904, "step": 310 }, { "epoch": 7.93, "eval_loss": 8.828449249267578, "eval_runtime": 224.0134, "eval_samples_per_second": 19.088, "eval_steps_per_second": 9.544, "eval_wer": 1.0124075165324429, "step": 310 }, { "epoch": 8.2, "learning_rate": 1.868421052631579e-09, "loss": 8.7425, "step": 320 }, { "epoch": 8.2, "eval_loss": 8.828290939331055, "eval_runtime": 219.9475, "eval_samples_per_second": 19.441, "eval_steps_per_second": 9.721, "eval_wer": 1.0125384665750017, "step": 320 }, { "epoch": 8.45, "learning_rate": 1.605263157894737e-09, "loss": 8.0963, "step": 330 }, { "epoch": 8.45, "eval_loss": 8.828081130981445, "eval_runtime": 222.5212, "eval_samples_per_second": 19.216, "eval_steps_per_second": 9.608, "eval_wer": 1.0124075165324429, "step": 330 }, { "epoch": 8.7, "learning_rate": 1.3421052631578948e-09, "loss": 8.1112, "step": 340 }, { "epoch": 8.7, "eval_loss": 8.828051567077637, "eval_runtime": 222.696, "eval_samples_per_second": 19.201, "eval_steps_per_second": 9.601, "eval_wer": 1.0124402540430826, "step": 340 }, { "epoch": 8.96, "learning_rate": 1.0789473684210528e-09, "loss": 8.124, "step": 350 }, { "epoch": 8.96, "eval_loss": 8.828123092651367, "eval_runtime": 222.2122, "eval_samples_per_second": 19.243, "eval_steps_per_second": 9.621, "eval_wer": 1.012505729064362, "step": 350 }, { "epoch": 9.23, "learning_rate": 8.157894736842106e-10, "loss": 8.7327, "step": 360 }, { "epoch": 9.23, "eval_loss": 8.827865600585938, "eval_runtime": 222.5189, "eval_samples_per_second": 19.216, "eval_steps_per_second": 9.608, "eval_wer": 1.0123420415111635, "step": 360 }, { "epoch": 9.48, "learning_rate": 5.526315789473684e-10, "loss": 8.1261, "step": 370 }, { "epoch": 9.48, "eval_loss": 8.827857971191406, "eval_runtime": 224.6034, "eval_samples_per_second": 19.038, "eval_steps_per_second": 9.519, "eval_wer": 1.012603941596281, "step": 370 }, { "epoch": 9.73, "learning_rate": 2.894736842105263e-10, "loss": 8.1259, "step": 380 }, { "epoch": 9.73, "eval_loss": 8.827925682067871, "eval_runtime": 223.0189, "eval_samples_per_second": 19.173, "eval_steps_per_second": 9.587, "eval_wer": 1.0123747790218032, "step": 380 }, { "epoch": 9.98, "learning_rate": 2.631578947368421e-11, "loss": 8.1116, "step": 390 }, { "epoch": 9.98, "eval_loss": 8.827937126159668, "eval_runtime": 224.7494, "eval_samples_per_second": 19.026, "eval_steps_per_second": 9.513, "eval_wer": 1.0123093040005238, "step": 390 }, { "epoch": 9.98, "step": 390, "total_flos": 1.7181016563618468e+19, "train_loss": 8.274780469063002, "train_runtime": 14935.5952, "train_samples_per_second": 6.813, "train_steps_per_second": 0.026 } ], "max_steps": 390, "num_train_epochs": 10, "total_flos": 1.7181016563618468e+19, "trial_name": null, "trial_params": null }