{ "best_metric": null, "best_model_checkpoint": null, "epoch": 49.079754601226995, "global_step": 16000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.23, "learning_rate": 0.0002376, "loss": 6.548, "step": 400 }, { "epoch": 1.23, "eval_cer": 1.0, "eval_loss": 3.476283073425293, "eval_runtime": 60.3486, "eval_samples_per_second": 26.513, "eval_steps_per_second": 1.657, "eval_wer": 1.0, "step": 400 }, { "epoch": 2.45, "learning_rate": 0.000294379746835443, "loss": 3.42, "step": 800 }, { "epoch": 2.45, "eval_cer": 1.0, "eval_loss": 3.3155558109283447, "eval_runtime": 60.3859, "eval_samples_per_second": 26.496, "eval_steps_per_second": 1.656, "eval_wer": 1.0, "step": 800 }, { "epoch": 3.68, "learning_rate": 0.00028678481012658224, "loss": 3.291, "step": 1200 }, { "epoch": 3.68, "eval_cer": 1.0, "eval_loss": 3.2396233081817627, "eval_runtime": 60.8505, "eval_samples_per_second": 26.294, "eval_steps_per_second": 1.643, "eval_wer": 1.0, "step": 1200 }, { "epoch": 4.91, "learning_rate": 0.0002791898734177215, "loss": 2.6515, "step": 1600 }, { "epoch": 4.91, "eval_cer": 0.5835108495504928, "eval_loss": 2.042246103286743, "eval_runtime": 60.4335, "eval_samples_per_second": 26.475, "eval_steps_per_second": 1.655, "eval_wer": 0.999671772428884, "step": 1600 }, { "epoch": 6.13, "learning_rate": 0.00027159493670886076, "loss": 1.7019, "step": 2000 }, { "epoch": 6.13, "eval_cer": 0.4797450987471567, "eval_loss": 1.6337121725082397, "eval_runtime": 60.2878, "eval_samples_per_second": 26.539, "eval_steps_per_second": 1.659, "eval_wer": 0.9892778993435448, "step": 2000 }, { "epoch": 7.36, "learning_rate": 0.00026399999999999997, "loss": 1.3604, "step": 2400 }, { "epoch": 7.36, "eval_cer": 0.44627576993898255, "eval_loss": 1.5220645666122437, "eval_runtime": 60.8256, "eval_samples_per_second": 26.305, "eval_steps_per_second": 1.644, "eval_wer": 0.987527352297593, "step": 2400 }, { "epoch": 8.59, "learning_rate": 0.0002564050632911392, "loss": 1.1965, "step": 2800 }, { "epoch": 8.59, "eval_cer": 0.4246669314366177, "eval_loss": 1.528436303138733, "eval_runtime": 60.5379, "eval_samples_per_second": 26.43, "eval_steps_per_second": 1.652, "eval_wer": 0.9765864332603938, "step": 2800 }, { "epoch": 9.82, "learning_rate": 0.00024881012658227843, "loss": 1.069, "step": 3200 }, { "epoch": 9.82, "eval_cer": 0.41239123370762176, "eval_loss": 1.5227978229522705, "eval_runtime": 60.2014, "eval_samples_per_second": 26.577, "eval_steps_per_second": 1.661, "eval_wer": 0.9671772428884027, "step": 3200 }, { "epoch": 11.04, "learning_rate": 0.00024121518987341772, "loss": 0.9536, "step": 3600 }, { "epoch": 11.04, "eval_cer": 0.38684695093331406, "eval_loss": 1.4059038162231445, "eval_runtime": 60.2947, "eval_samples_per_second": 26.536, "eval_steps_per_second": 1.659, "eval_wer": 0.9599562363238512, "step": 3600 }, { "epoch": 12.27, "learning_rate": 0.00023362025316455695, "loss": 0.8487, "step": 4000 }, { "epoch": 12.27, "eval_cer": 0.37390331082788747, "eval_loss": 1.4082870483398438, "eval_runtime": 60.1938, "eval_samples_per_second": 26.581, "eval_steps_per_second": 1.661, "eval_wer": 0.950109409190372, "step": 4000 }, { "epoch": 13.5, "learning_rate": 0.00022602531645569618, "loss": 0.7655, "step": 4400 }, { "epoch": 13.5, "eval_cer": 0.36121240567570495, "eval_loss": 1.40787935256958, "eval_runtime": 60.2423, "eval_samples_per_second": 26.559, "eval_steps_per_second": 1.66, "eval_wer": 0.9368708971553611, "step": 4400 }, { "epoch": 14.72, "learning_rate": 0.0002184303797468354, "loss": 0.6956, "step": 4800 }, { "epoch": 14.72, "eval_cer": 0.34590388850778064, "eval_loss": 1.417035698890686, "eval_runtime": 60.1538, "eval_samples_per_second": 26.598, "eval_steps_per_second": 1.662, "eval_wer": 0.9411378555798687, "step": 4800 }, { "epoch": 15.95, "learning_rate": 0.00021083544303797464, "loss": 0.6287, "step": 5200 }, { "epoch": 15.95, "eval_cer": 0.3383579449037802, "eval_loss": 1.3999766111373901, "eval_runtime": 60.2038, "eval_samples_per_second": 26.576, "eval_steps_per_second": 1.661, "eval_wer": 0.9235229759299781, "step": 5200 }, { "epoch": 17.18, "learning_rate": 0.00020324050632911393, "loss": 0.561, "step": 5600 }, { "epoch": 17.18, "eval_cer": 0.32953027403689933, "eval_loss": 1.4734982252120972, "eval_runtime": 60.3387, "eval_samples_per_second": 26.517, "eval_steps_per_second": 1.657, "eval_wer": 0.9022975929978119, "step": 5600 }, { "epoch": 18.4, "learning_rate": 0.00019564556962025316, "loss": 0.5155, "step": 6000 }, { "epoch": 18.4, "eval_cer": 0.32232732786944435, "eval_loss": 1.538634181022644, "eval_runtime": 69.1066, "eval_samples_per_second": 23.153, "eval_steps_per_second": 1.447, "eval_wer": 0.9202407002188184, "step": 6000 }, { "epoch": 19.63, "learning_rate": 0.0001880506329113924, "loss": 0.4864, "step": 6400 }, { "epoch": 19.63, "eval_cer": 0.32590172220818137, "eval_loss": 1.618619680404663, "eval_runtime": 69.0557, "eval_samples_per_second": 23.17, "eval_steps_per_second": 1.448, "eval_wer": 0.9073304157549235, "step": 6400 }, { "epoch": 20.86, "learning_rate": 0.00018045569620253163, "loss": 0.4261, "step": 6800 }, { "epoch": 20.86, "eval_cer": 0.313030292089396, "eval_loss": 1.6417021751403809, "eval_runtime": 68.875, "eval_samples_per_second": 23.23, "eval_steps_per_second": 1.452, "eval_wer": 0.9216630196936543, "step": 6800 }, { "epoch": 22.09, "learning_rate": 0.00017286075949367088, "loss": 0.4051, "step": 7200 }, { "epoch": 22.09, "eval_cer": 0.30263205401307003, "eval_loss": 1.6295086145401, "eval_runtime": 69.3362, "eval_samples_per_second": 23.076, "eval_steps_per_second": 1.442, "eval_wer": 0.8954048140043763, "step": 7200 }, { "epoch": 23.31, "learning_rate": 0.00016526582278481012, "loss": 0.3779, "step": 7600 }, { "epoch": 23.31, "eval_cer": 0.31534101166191286, "eval_loss": 1.8218317031860352, "eval_runtime": 68.8091, "eval_samples_per_second": 23.253, "eval_steps_per_second": 1.453, "eval_wer": 0.8979212253829322, "step": 7600 }, { "epoch": 24.54, "learning_rate": 0.00015767088607594935, "loss": 0.35, "step": 8000 }, { "epoch": 24.54, "eval_cer": 0.303570783839405, "eval_loss": 1.779032588005066, "eval_runtime": 68.3457, "eval_samples_per_second": 23.41, "eval_steps_per_second": 1.463, "eval_wer": 0.8921225382932166, "step": 8000 }, { "epoch": 25.77, "learning_rate": 0.00015007594936708858, "loss": 0.3343, "step": 8400 }, { "epoch": 25.77, "eval_cer": 0.3071812831714626, "eval_loss": 1.8588141202926636, "eval_runtime": 60.0463, "eval_samples_per_second": 26.646, "eval_steps_per_second": 1.665, "eval_wer": 0.9113785557986871, "step": 8400 }, { "epoch": 26.99, "learning_rate": 0.00014248101265822784, "loss": 0.3137, "step": 8800 }, { "epoch": 26.99, "eval_cer": 0.29351554319962453, "eval_loss": 1.8095606565475464, "eval_runtime": 59.7272, "eval_samples_per_second": 26.788, "eval_steps_per_second": 1.674, "eval_wer": 0.8756017505470459, "step": 8800 }, { "epoch": 28.22, "learning_rate": 0.00013488607594936707, "loss": 0.299, "step": 9200 }, { "epoch": 28.22, "eval_cer": 0.3023251615698451, "eval_loss": 1.9720637798309326, "eval_runtime": 60.067, "eval_samples_per_second": 26.637, "eval_steps_per_second": 1.665, "eval_wer": 0.8863238512035011, "step": 9200 }, { "epoch": 29.45, "learning_rate": 0.00012729113924050633, "loss": 0.2894, "step": 9600 }, { "epoch": 29.45, "eval_cer": 0.29584431526880167, "eval_loss": 1.9907439947128296, "eval_runtime": 68.8361, "eval_samples_per_second": 23.244, "eval_steps_per_second": 1.453, "eval_wer": 0.887199124726477, "step": 9600 }, { "epoch": 30.67, "learning_rate": 0.00011969620253164556, "loss": 0.2784, "step": 10000 }, { "epoch": 30.67, "eval_cer": 0.2944542730259595, "eval_loss": 1.9494301080703735, "eval_runtime": 68.9865, "eval_samples_per_second": 23.193, "eval_steps_per_second": 1.45, "eval_wer": 0.9089715536105033, "step": 10000 }, { "epoch": 31.9, "learning_rate": 0.00011210126582278481, "loss": 0.2662, "step": 10400 }, { "epoch": 31.9, "eval_cer": 0.29346138570964364, "eval_loss": 1.995172142982483, "eval_runtime": 68.9796, "eval_samples_per_second": 23.195, "eval_steps_per_second": 1.45, "eval_wer": 0.8978118161925602, "step": 10400 }, { "epoch": 33.13, "learning_rate": 0.00010450632911392404, "loss": 0.2614, "step": 10800 }, { "epoch": 33.13, "eval_cer": 0.29790229988807454, "eval_loss": 2.0600392818450928, "eval_runtime": 68.6491, "eval_samples_per_second": 23.307, "eval_steps_per_second": 1.457, "eval_wer": 0.8948577680525164, "step": 10800 }, { "epoch": 34.36, "learning_rate": 9.691139240506327e-05, "loss": 0.2401, "step": 11200 }, { "epoch": 34.36, "eval_cer": 0.29495974293244753, "eval_loss": 2.118035316467285, "eval_runtime": 68.8384, "eval_samples_per_second": 23.243, "eval_steps_per_second": 1.453, "eval_wer": 0.8913566739606127, "step": 11200 }, { "epoch": 35.58, "learning_rate": 8.933544303797467e-05, "loss": 0.2392, "step": 11600 }, { "epoch": 35.58, "eval_cer": 0.28950788894104057, "eval_loss": 2.1196768283843994, "eval_runtime": 68.573, "eval_samples_per_second": 23.333, "eval_steps_per_second": 1.458, "eval_wer": 0.8713347921225383, "step": 11600 }, { "epoch": 36.81, "learning_rate": 8.175949367088606e-05, "loss": 0.23, "step": 12000 }, { "epoch": 36.81, "eval_cer": 0.2940932230927537, "eval_loss": 2.168043851852417, "eval_runtime": 68.6136, "eval_samples_per_second": 23.319, "eval_steps_per_second": 1.457, "eval_wer": 0.8713347921225383, "step": 12000 }, { "epoch": 38.04, "learning_rate": 7.416455696202532e-05, "loss": 0.2246, "step": 12400 }, { "epoch": 38.04, "eval_cer": 0.2879192692349352, "eval_loss": 2.1525843143463135, "eval_runtime": 59.7644, "eval_samples_per_second": 26.772, "eval_steps_per_second": 1.673, "eval_wer": 0.874070021881838, "step": 12400 }, { "epoch": 39.26, "learning_rate": 6.656962025316455e-05, "loss": 0.2152, "step": 12800 }, { "epoch": 39.26, "eval_cer": 0.28893020904791133, "eval_loss": 2.263143301010132, "eval_runtime": 60.4724, "eval_samples_per_second": 26.458, "eval_steps_per_second": 1.654, "eval_wer": 0.8789934354485777, "step": 12800 }, { "epoch": 40.49, "learning_rate": 5.897468354430379e-05, "loss": 0.212, "step": 13200 }, { "epoch": 40.49, "eval_cer": 0.28430876990287757, "eval_loss": 2.2723913192749023, "eval_runtime": 60.2908, "eval_samples_per_second": 26.538, "eval_steps_per_second": 1.659, "eval_wer": 0.8660831509846827, "step": 13200 }, { "epoch": 41.72, "learning_rate": 5.1379746835443034e-05, "loss": 0.2044, "step": 13600 }, { "epoch": 41.72, "eval_cer": 0.2877929017583132, "eval_loss": 2.2438297271728516, "eval_runtime": 70.0914, "eval_samples_per_second": 22.827, "eval_steps_per_second": 1.427, "eval_wer": 0.8691466083150985, "step": 13600 }, { "epoch": 42.94, "learning_rate": 4.380379746835442e-05, "loss": 0.2029, "step": 14000 }, { "epoch": 42.94, "eval_cer": 0.28327977759324113, "eval_loss": 2.2518999576568604, "eval_runtime": 69.2505, "eval_samples_per_second": 23.105, "eval_steps_per_second": 1.444, "eval_wer": 0.8576586433260394, "step": 14000 }, { "epoch": 44.17, "learning_rate": 3.6208860759493666e-05, "loss": 0.1972, "step": 14400 }, { "epoch": 44.17, "eval_cer": 0.28133010795393004, "eval_loss": 2.2697150707244873, "eval_runtime": 68.5533, "eval_samples_per_second": 23.34, "eval_steps_per_second": 1.459, "eval_wer": 0.8603938730853392, "step": 14400 }, { "epoch": 45.4, "learning_rate": 2.861392405063291e-05, "loss": 0.1884, "step": 14800 }, { "epoch": 45.4, "eval_cer": 0.2846878723327436, "eval_loss": 2.3294308185577393, "eval_runtime": 60.5028, "eval_samples_per_second": 26.445, "eval_steps_per_second": 1.653, "eval_wer": 0.8661925601750547, "step": 14800 }, { "epoch": 46.63, "learning_rate": 2.101898734177215e-05, "loss": 0.1877, "step": 15200 }, { "epoch": 46.63, "eval_cer": 0.2792721233346572, "eval_loss": 2.3077094554901123, "eval_runtime": 60.8923, "eval_samples_per_second": 26.276, "eval_steps_per_second": 1.642, "eval_wer": 0.8561269146608315, "step": 15200 }, { "epoch": 47.85, "learning_rate": 1.3424050632911391e-05, "loss": 0.1871, "step": 15600 }, { "epoch": 47.85, "eval_cer": 0.2801025381810304, "eval_loss": 2.351794481277466, "eval_runtime": 60.3104, "eval_samples_per_second": 26.529, "eval_steps_per_second": 1.658, "eval_wer": 0.8563457330415755, "step": 15600 }, { "epoch": 49.08, "learning_rate": 5.829113924050632e-06, "loss": 0.1838, "step": 16000 }, { "epoch": 49.08, "eval_cer": 0.27992201321442756, "eval_loss": 2.3462095260620117, "eval_runtime": 60.2933, "eval_samples_per_second": 26.537, "eval_steps_per_second": 1.659, "eval_wer": 0.8555798687089715, "step": 16000 } ], "max_steps": 16300, "num_train_epochs": 50, "total_flos": 6.654066355887293e+19, "trial_name": null, "trial_params": null }