{ "best_metric": null, "best_model_checkpoint": null, "epoch": 30.0, "global_step": 16950, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.71, "eval_cer": 0.479669124113056, "eval_loss": 1.7289525270462036, "eval_runtime": 352.9433, "eval_samples_per_second": 28.738, "eval_steps_per_second": 3.593, "eval_wer": 0.9804149925270289, "step": 400 }, { "epoch": 0.88, "learning_rate": 0.0002982, "loss": 4.5435, "step": 500 }, { "epoch": 1.42, "eval_cer": 0.1449701156451013, "eval_loss": 0.48100030422210693, "eval_runtime": 355.0295, "eval_samples_per_second": 28.569, "eval_steps_per_second": 3.572, "eval_wer": 0.5774374502610687, "step": 800 }, { "epoch": 1.77, "learning_rate": 0.0002909361702127659, "loss": 0.523, "step": 1000 }, { "epoch": 2.12, "eval_cer": 0.11564221810245158, "eval_loss": 0.3859069347381592, "eval_runtime": 358.2561, "eval_samples_per_second": 28.312, "eval_steps_per_second": 3.539, "eval_wer": 0.48120111026999746, "step": 1200 }, { "epoch": 2.65, "learning_rate": 0.0002818176291793313, "loss": 0.3449, "step": 1500 }, { "epoch": 2.83, "eval_cer": 0.1094981215960235, "eval_loss": 0.34923675656318665, "eval_runtime": 353.6657, "eval_samples_per_second": 28.68, "eval_steps_per_second": 3.585, "eval_wer": 0.44977581086589413, "step": 1600 }, { "epoch": 3.54, "learning_rate": 0.0002726990881458966, "loss": 0.2814, "step": 2000 }, { "epoch": 3.54, "eval_cer": 0.10994501740010437, "eval_loss": 0.36604171991348267, "eval_runtime": 352.2579, "eval_samples_per_second": 28.794, "eval_steps_per_second": 3.6, "eval_wer": 0.44657310895009605, "step": 2000 }, { "epoch": 4.25, "eval_cer": 0.10429683107626926, "eval_loss": 0.37658488750457764, "eval_runtime": 352.034, "eval_samples_per_second": 28.813, "eval_steps_per_second": 3.602, "eval_wer": 0.4235330654709913, "step": 2400 }, { "epoch": 4.42, "learning_rate": 0.000263580547112462, "loss": 0.2463, "step": 2500 }, { "epoch": 4.96, "eval_cer": 0.10097250293365471, "eval_loss": 0.34164857864379883, "eval_runtime": 355.8322, "eval_samples_per_second": 28.505, "eval_steps_per_second": 3.563, "eval_wer": 0.4119256973155535, "step": 2800 }, { "epoch": 5.31, "learning_rate": 0.0002544620060790273, "loss": 0.2296, "step": 3000 }, { "epoch": 5.66, "eval_cer": 0.09793072826716873, "eval_loss": 0.3322136402130127, "eval_runtime": 355.102, "eval_samples_per_second": 28.564, "eval_steps_per_second": 3.571, "eval_wer": 0.4012500242628933, "step": 3200 }, { "epoch": 6.19, "learning_rate": 0.00024534346504559266, "loss": 0.2143, "step": 3500 }, { "epoch": 6.37, "eval_cer": 0.09717821339707125, "eval_loss": 0.3369796872138977, "eval_runtime": 355.7454, "eval_samples_per_second": 28.512, "eval_steps_per_second": 3.564, "eval_wer": 0.395601622702304, "step": 3600 }, { "epoch": 7.08, "learning_rate": 0.00023622492401215801, "loss": 0.1955, "step": 4000 }, { "epoch": 7.08, "eval_cer": 0.09977020906074034, "eval_loss": 0.3401270806789398, "eval_runtime": 359.7076, "eval_samples_per_second": 28.198, "eval_steps_per_second": 3.525, "eval_wer": 0.40330751761486056, "step": 4000 }, { "epoch": 7.79, "eval_cer": 0.09622099141671736, "eval_loss": 0.33754295110702515, "eval_runtime": 352.1651, "eval_samples_per_second": 28.802, "eval_steps_per_second": 3.601, "eval_wer": 0.3889244744657311, "step": 4400 }, { "epoch": 7.96, "learning_rate": 0.0002271063829787234, "loss": 0.1845, "step": 4500 }, { "epoch": 8.5, "eval_cer": 0.09233155632184571, "eval_loss": 0.34551626443862915, "eval_runtime": 357.5651, "eval_samples_per_second": 28.367, "eval_steps_per_second": 3.546, "eval_wer": 0.37524020264368485, "step": 4800 }, { "epoch": 8.85, "learning_rate": 0.00021798784194528871, "loss": 0.1752, "step": 5000 }, { "epoch": 9.2, "eval_cer": 0.09245553386749396, "eval_loss": 0.3335849642753601, "eval_runtime": 351.3613, "eval_samples_per_second": 28.868, "eval_steps_per_second": 3.609, "eval_wer": 0.37176575632291, "step": 5200 }, { "epoch": 9.73, "learning_rate": 0.0002088693009118541, "loss": 0.1705, "step": 5500 }, { "epoch": 9.91, "eval_cer": 0.08918598650086351, "eval_loss": 0.3145359754562378, "eval_runtime": 355.7051, "eval_samples_per_second": 28.515, "eval_steps_per_second": 3.565, "eval_wer": 0.3653021215473903, "step": 5600 }, { "epoch": 10.62, "learning_rate": 0.00019975075987841941, "loss": 0.1585, "step": 6000 }, { "epoch": 10.62, "eval_cer": 0.09218451318630942, "eval_loss": 0.34097233414649963, "eval_runtime": 352.3797, "eval_samples_per_second": 28.784, "eval_steps_per_second": 3.598, "eval_wer": 0.37370678778703004, "step": 6000 }, { "epoch": 11.33, "eval_cer": 0.08989237019118491, "eval_loss": 0.3296053409576416, "eval_runtime": 350.4695, "eval_samples_per_second": 28.941, "eval_steps_per_second": 3.618, "eval_wer": 0.3664279197965799, "step": 6400 }, { "epoch": 11.5, "learning_rate": 0.0001906322188449848, "loss": 0.1474, "step": 6500 }, { "epoch": 12.04, "eval_cer": 0.08988083739624089, "eval_loss": 0.34918734431266785, "eval_runtime": 352.3033, "eval_samples_per_second": 28.791, "eval_steps_per_second": 3.599, "eval_wer": 0.3589937692890002, "step": 6800 }, { "epoch": 12.39, "learning_rate": 0.00018153191489361702, "loss": 0.1485, "step": 7000 }, { "epoch": 12.74, "eval_cer": 0.08672950117778669, "eval_loss": 0.31763964891433716, "eval_runtime": 352.8906, "eval_samples_per_second": 28.743, "eval_steps_per_second": 3.593, "eval_wer": 0.3506085133640016, "step": 7200 }, { "epoch": 13.27, "learning_rate": 0.00017241337386018235, "loss": 0.137, "step": 7500 }, { "epoch": 13.45, "eval_cer": 0.08901587777543918, "eval_loss": 0.3532153367996216, "eval_runtime": 357.5465, "eval_samples_per_second": 28.368, "eval_steps_per_second": 3.546, "eval_wer": 0.360041926279625, "step": 7600 }, { "epoch": 14.16, "learning_rate": 0.00016329483282674772, "loss": 0.1291, "step": 8000 }, { "epoch": 14.16, "eval_cer": 0.08733497291234787, "eval_loss": 0.33181944489479065, "eval_runtime": 352.5023, "eval_samples_per_second": 28.774, "eval_steps_per_second": 3.597, "eval_wer": 0.3570527378248801, "step": 8000 }, { "epoch": 14.87, "eval_cer": 0.08829796129017377, "eval_loss": 0.33532437682151794, "eval_runtime": 357.1338, "eval_samples_per_second": 28.401, "eval_steps_per_second": 3.55, "eval_wer": 0.3547623206972185, "step": 8400 }, { "epoch": 15.04, "learning_rate": 0.00015417629179331305, "loss": 0.1274, "step": 8500 }, { "epoch": 15.58, "eval_cer": 0.08226342633571389, "eval_loss": 0.32346823811531067, "eval_runtime": 349.5389, "eval_samples_per_second": 29.018, "eval_steps_per_second": 3.628, "eval_wer": 0.339602864962441, "step": 8800 }, { "epoch": 15.93, "learning_rate": 0.00014505775075987842, "loss": 0.1198, "step": 9000 }, { "epoch": 16.28, "eval_cer": 0.08322353151480379, "eval_loss": 0.32590439915657043, "eval_runtime": 352.9664, "eval_samples_per_second": 28.736, "eval_steps_per_second": 3.592, "eval_wer": 0.33894291426464024, "step": 9200 }, { "epoch": 16.81, "learning_rate": 0.00013595744680851063, "loss": 0.1164, "step": 9500 }, { "epoch": 16.99, "eval_cer": 0.084353745419318, "eval_loss": 0.32632604241371155, "eval_runtime": 355.0498, "eval_samples_per_second": 28.568, "eval_steps_per_second": 3.571, "eval_wer": 0.3411362798190959, "step": 9600 }, { "epoch": 17.7, "learning_rate": 0.00012683890577507598, "loss": 0.1119, "step": 10000 }, { "epoch": 17.7, "eval_cer": 0.08243353506113824, "eval_loss": 0.32535773515701294, "eval_runtime": 352.3077, "eval_samples_per_second": 28.79, "eval_steps_per_second": 3.599, "eval_wer": 0.3377006541276034, "step": 10000 }, { "epoch": 18.41, "eval_cer": 0.0811533948223517, "eval_loss": 0.3243008255958557, "eval_runtime": 356.0414, "eval_samples_per_second": 28.488, "eval_steps_per_second": 3.561, "eval_wer": 0.3330615889283565, "step": 10400 }, { "epoch": 18.58, "learning_rate": 0.00011772036474164133, "loss": 0.1054, "step": 10500 }, { "epoch": 19.12, "eval_cer": 0.07895063098804338, "eval_loss": 0.32234683632850647, "eval_runtime": 353.14, "eval_samples_per_second": 28.722, "eval_steps_per_second": 3.591, "eval_wer": 0.3239387410469924, "step": 10800 }, { "epoch": 19.47, "learning_rate": 0.00010860182370820666, "loss": 0.1017, "step": 11000 }, { "epoch": 19.82, "eval_cer": 0.07741965245922436, "eval_loss": 0.305361270904541, "eval_runtime": 348.3343, "eval_samples_per_second": 29.119, "eval_steps_per_second": 3.64, "eval_wer": 0.3189502901842039, "step": 11200 }, { "epoch": 20.35, "learning_rate": 9.948328267477204e-05, "loss": 0.0964, "step": 11500 }, { "epoch": 20.53, "eval_cer": 0.07850373518396249, "eval_loss": 0.32777705788612366, "eval_runtime": 353.3356, "eval_samples_per_second": 28.706, "eval_steps_per_second": 3.589, "eval_wer": 0.3236669966420156, "step": 11600 }, { "epoch": 21.24, "learning_rate": 9.036474164133739e-05, "loss": 0.0903, "step": 12000 }, { "epoch": 21.24, "eval_cer": 0.07744560124784841, "eval_loss": 0.3166551887989044, "eval_runtime": 353.0166, "eval_samples_per_second": 28.732, "eval_steps_per_second": 3.592, "eval_wer": 0.3177274403618083, "step": 12000 }, { "epoch": 21.95, "eval_cer": 0.07655469283842266, "eval_loss": 0.33310163021087646, "eval_runtime": 354.4672, "eval_samples_per_second": 28.615, "eval_steps_per_second": 3.577, "eval_wer": 0.3124478347794018, "step": 12400 }, { "epoch": 22.12, "learning_rate": 8.124620060790274e-05, "loss": 0.0886, "step": 12500 }, { "epoch": 22.65, "eval_cer": 0.07452492092827466, "eval_loss": 0.3098578155040741, "eval_runtime": 354.4398, "eval_samples_per_second": 28.617, "eval_steps_per_second": 3.577, "eval_wer": 0.30889574720006213, "step": 12800 }, { "epoch": 23.01, "learning_rate": 7.214589665653494e-05, "loss": 0.0836, "step": 13000 }, { "epoch": 23.36, "eval_cer": 0.07314963513119996, "eval_loss": 0.3170570433139801, "eval_runtime": 351.2874, "eval_samples_per_second": 28.874, "eval_steps_per_second": 3.61, "eval_wer": 0.3047613501814864, "step": 13200 }, { "epoch": 23.89, "learning_rate": 6.30273556231003e-05, "loss": 0.0796, "step": 13500 }, { "epoch": 24.07, "eval_cer": 0.07325919668316817, "eval_loss": 0.315768837928772, "eval_runtime": 354.0965, "eval_samples_per_second": 28.645, "eval_steps_per_second": 3.581, "eval_wer": 0.30410139948368564, "step": 13600 }, { "epoch": 24.78, "learning_rate": 5.390881458966565e-05, "loss": 0.0739, "step": 14000 }, { "epoch": 24.78, "eval_cer": 0.07206266920772582, "eval_loss": 0.3202644884586334, "eval_runtime": 351.0812, "eval_samples_per_second": 28.891, "eval_steps_per_second": 3.612, "eval_wer": 0.3002775674993692, "step": 14000 }, { "epoch": 25.49, "eval_cer": 0.07125249036290822, "eval_loss": 0.3138331472873688, "eval_runtime": 353.8044, "eval_samples_per_second": 28.668, "eval_steps_per_second": 3.584, "eval_wer": 0.2973660203031891, "step": 14400 }, { "epoch": 25.66, "learning_rate": 4.4790273556231e-05, "loss": 0.0742, "step": 14500 }, { "epoch": 26.19, "eval_cer": 0.07109103123369191, "eval_loss": 0.3196839392185211, "eval_runtime": 353.996, "eval_samples_per_second": 28.653, "eval_steps_per_second": 3.582, "eval_wer": 0.2958520157611755, "step": 14800 }, { "epoch": 26.55, "learning_rate": 3.568996960486322e-05, "loss": 0.07, "step": 15000 }, { "epoch": 26.9, "eval_cer": 0.07031256757497037, "eval_loss": 0.3232352137565613, "eval_runtime": 355.2159, "eval_samples_per_second": 28.554, "eval_steps_per_second": 3.57, "eval_wer": 0.2951726547487335, "step": 15200 }, { "epoch": 27.43, "learning_rate": 2.6571428571428566e-05, "loss": 0.0654, "step": 15500 }, { "epoch": 27.61, "eval_cer": 0.07008479487482593, "eval_loss": 0.3242589831352234, "eval_runtime": 352.5643, "eval_samples_per_second": 28.769, "eval_steps_per_second": 3.597, "eval_wer": 0.2938527533531319, "step": 15600 }, { "epoch": 28.32, "learning_rate": 1.745288753799392e-05, "loss": 0.0631, "step": 16000 }, { "epoch": 28.32, "eval_cer": 0.06881907062971944, "eval_loss": 0.3212898373603821, "eval_runtime": 352.1058, "eval_samples_per_second": 28.807, "eval_steps_per_second": 3.601, "eval_wer": 0.2875638114093829, "step": 16000 }, { "epoch": 29.03, "eval_cer": 0.06853940035232689, "eval_loss": 0.3151107728481293, "eval_runtime": 353.743, "eval_samples_per_second": 28.673, "eval_steps_per_second": 3.585, "eval_wer": 0.28799083833148936, "step": 16400 }, { "epoch": 29.2, "learning_rate": 8.33434650455927e-06, "loss": 0.0607, "step": 16500 }, { "epoch": 29.73, "eval_cer": 0.06810115414445403, "eval_loss": 0.31835824251174927, "eval_runtime": 352.8555, "eval_samples_per_second": 28.745, "eval_steps_per_second": 3.594, "eval_wer": 0.28665152662124654, "step": 16800 }, { "epoch": 30.0, "step": 16950, "total_flos": 1.1955997003691401e+20, "train_loss": 0.2775966154790558, "train_runtime": 58895.0057, "train_samples_per_second": 18.401, "train_steps_per_second": 0.288 } ], "max_steps": 16950, "num_train_epochs": 30, "total_flos": 1.1955997003691401e+20, "trial_name": null, "trial_params": null }