{ "best_metric": null, "best_model_checkpoint": null, "epoch": 100.0, "global_step": 11300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.88, "learning_rate": 5.82e-05, "loss": 10.446, "step": 100 }, { "epoch": 1.77, "learning_rate": 0.0001182, "loss": 3.652, "step": 200 }, { "epoch": 2.65, "learning_rate": 0.00017819999999999997, "loss": 3.2102, "step": 300 }, { "epoch": 3.54, "learning_rate": 0.0002382, "loss": 3.1322, "step": 400 }, { "epoch": 4.42, "learning_rate": 0.0002982, "loss": 3.0798, "step": 500 }, { "epoch": 4.42, "eval_loss": 3.001023054122925, "eval_runtime": 56.1867, "eval_samples_per_second": 30.31, "eval_steps_per_second": 0.961, "eval_wer": 1.0011981566820276, "step": 500 }, { "epoch": 5.31, "learning_rate": 0.00029730555555555554, "loss": 2.5544, "step": 600 }, { "epoch": 6.19, "learning_rate": 0.0002945277777777777, "loss": 1.8568, "step": 700 }, { "epoch": 7.08, "learning_rate": 0.00029174999999999996, "loss": 1.6575, "step": 800 }, { "epoch": 7.96, "learning_rate": 0.0002889722222222222, "loss": 1.5125, "step": 900 }, { "epoch": 8.85, "learning_rate": 0.0002861944444444444, "loss": 1.4336, "step": 1000 }, { "epoch": 8.85, "eval_loss": 0.8481376767158508, "eval_runtime": 54.9915, "eval_samples_per_second": 30.968, "eval_steps_per_second": 0.982, "eval_wer": 0.6910599078341014, "step": 1000 }, { "epoch": 9.73, "learning_rate": 0.0002834166666666666, "loss": 1.3709, "step": 1100 }, { "epoch": 10.62, "learning_rate": 0.00028063888888888886, "loss": 1.3058, "step": 1200 }, { "epoch": 11.5, "learning_rate": 0.0002778611111111111, "loss": 1.2604, "step": 1300 }, { "epoch": 12.39, "learning_rate": 0.0002750833333333333, "loss": 1.2065, "step": 1400 }, { "epoch": 13.27, "learning_rate": 0.00027230555555555553, "loss": 1.2062, "step": 1500 }, { "epoch": 13.27, "eval_loss": 0.7312180995941162, "eval_runtime": 55.5977, "eval_samples_per_second": 30.631, "eval_steps_per_second": 0.971, "eval_wer": 0.6332718894009216, "step": 1500 }, { "epoch": 14.16, "learning_rate": 0.00026952777777777777, "loss": 1.1712, "step": 1600 }, { "epoch": 15.04, "learning_rate": 0.00026674999999999995, "loss": 1.1348, "step": 1700 }, { "epoch": 15.93, "learning_rate": 0.0002639722222222222, "loss": 1.1077, "step": 1800 }, { "epoch": 16.81, "learning_rate": 0.00026119444444444443, "loss": 1.0821, "step": 1900 }, { "epoch": 17.7, "learning_rate": 0.0002584166666666666, "loss": 1.0481, "step": 2000 }, { "epoch": 17.7, "eval_loss": 0.6849815845489502, "eval_runtime": 54.2403, "eval_samples_per_second": 31.397, "eval_steps_per_second": 0.996, "eval_wer": 0.5358525345622119, "step": 2000 }, { "epoch": 18.58, "learning_rate": 0.0002556388888888889, "loss": 1.0495, "step": 2100 }, { "epoch": 19.47, "learning_rate": 0.0002528611111111111, "loss": 1.0119, "step": 2200 }, { "epoch": 20.35, "learning_rate": 0.0002500833333333333, "loss": 1.0156, "step": 2300 }, { "epoch": 21.24, "learning_rate": 0.00024730555555555557, "loss": 0.9916, "step": 2400 }, { "epoch": 22.12, "learning_rate": 0.00024452777777777776, "loss": 0.9837, "step": 2500 }, { "epoch": 22.12, "eval_loss": 0.6336787343025208, "eval_runtime": 55.0498, "eval_samples_per_second": 30.936, "eval_steps_per_second": 0.981, "eval_wer": 0.5316129032258065, "step": 2500 }, { "epoch": 23.01, "learning_rate": 0.00024174999999999997, "loss": 0.9558, "step": 2600 }, { "epoch": 23.89, "learning_rate": 0.0002389722222222222, "loss": 0.9523, "step": 2700 }, { "epoch": 24.78, "learning_rate": 0.00023619444444444442, "loss": 0.946, "step": 2800 }, { "epoch": 25.66, "learning_rate": 0.00023341666666666663, "loss": 0.909, "step": 2900 }, { "epoch": 26.55, "learning_rate": 0.00023063888888888887, "loss": 0.9108, "step": 3000 }, { "epoch": 26.55, "eval_loss": 0.6257887482643127, "eval_runtime": 55.6823, "eval_samples_per_second": 30.584, "eval_steps_per_second": 0.97, "eval_wer": 0.507926267281106, "step": 3000 }, { "epoch": 27.43, "learning_rate": 0.00022786111111111108, "loss": 0.9005, "step": 3100 }, { "epoch": 28.32, "learning_rate": 0.00022508333333333332, "loss": 0.9069, "step": 3200 }, { "epoch": 29.2, "learning_rate": 0.00022230555555555553, "loss": 0.8605, "step": 3300 }, { "epoch": 30.09, "learning_rate": 0.00021952777777777774, "loss": 0.8815, "step": 3400 }, { "epoch": 30.97, "learning_rate": 0.00021674999999999998, "loss": 0.8439, "step": 3500 }, { "epoch": 30.97, "eval_loss": 0.6301265954971313, "eval_runtime": 54.5193, "eval_samples_per_second": 31.237, "eval_steps_per_second": 0.99, "eval_wer": 0.48884792626728113, "step": 3500 }, { "epoch": 31.86, "learning_rate": 0.0002139722222222222, "loss": 0.8522, "step": 3600 }, { "epoch": 32.74, "learning_rate": 0.0002111944444444444, "loss": 0.8477, "step": 3700 }, { "epoch": 33.63, "learning_rate": 0.00020841666666666665, "loss": 0.7978, "step": 3800 }, { "epoch": 34.51, "learning_rate": 0.00020563888888888886, "loss": 0.8127, "step": 3900 }, { "epoch": 35.4, "learning_rate": 0.0002028611111111111, "loss": 0.7901, "step": 4000 }, { "epoch": 35.4, "eval_loss": 0.6244927048683167, "eval_runtime": 53.6992, "eval_samples_per_second": 31.714, "eval_steps_per_second": 1.006, "eval_wer": 0.4976958525345622, "step": 4000 }, { "epoch": 36.28, "learning_rate": 0.0002000833333333333, "loss": 0.7978, "step": 4100 }, { "epoch": 37.17, "learning_rate": 0.00019730555555555552, "loss": 0.8046, "step": 4200 }, { "epoch": 38.05, "learning_rate": 0.00019452777777777776, "loss": 0.7892, "step": 4300 }, { "epoch": 38.94, "learning_rate": 0.00019174999999999997, "loss": 0.7657, "step": 4400 }, { "epoch": 39.82, "learning_rate": 0.00018899999999999999, "loss": 0.7669, "step": 4500 }, { "epoch": 39.82, "eval_loss": 0.6164370775222778, "eval_runtime": 54.5713, "eval_samples_per_second": 31.207, "eval_steps_per_second": 0.99, "eval_wer": 0.4671889400921659, "step": 4500 }, { "epoch": 40.71, "learning_rate": 0.0001862222222222222, "loss": 0.7389, "step": 4600 }, { "epoch": 41.59, "learning_rate": 0.00018344444444444444, "loss": 0.7595, "step": 4700 }, { "epoch": 42.48, "learning_rate": 0.00018066666666666665, "loss": 0.7264, "step": 4800 }, { "epoch": 43.36, "learning_rate": 0.00017788888888888886, "loss": 0.7251, "step": 4900 }, { "epoch": 44.25, "learning_rate": 0.0001751111111111111, "loss": 0.7196, "step": 5000 }, { "epoch": 44.25, "eval_loss": 0.6039230227470398, "eval_runtime": 55.8782, "eval_samples_per_second": 30.477, "eval_steps_per_second": 0.966, "eval_wer": 0.4688479262672811, "step": 5000 }, { "epoch": 45.13, "learning_rate": 0.0001723333333333333, "loss": 0.6932, "step": 5100 }, { "epoch": 46.02, "learning_rate": 0.00016955555555555555, "loss": 0.6983, "step": 5200 }, { "epoch": 46.9, "learning_rate": 0.00016677777777777776, "loss": 0.6769, "step": 5300 }, { "epoch": 47.79, "learning_rate": 0.00016399999999999997, "loss": 0.6928, "step": 5400 }, { "epoch": 48.67, "learning_rate": 0.00016122222222222221, "loss": 0.6715, "step": 5500 }, { "epoch": 48.67, "eval_loss": 0.5900057554244995, "eval_runtime": 53.576, "eval_samples_per_second": 31.787, "eval_steps_per_second": 1.008, "eval_wer": 0.45732718894009217, "step": 5500 }, { "epoch": 49.56, "learning_rate": 0.00015849999999999998, "loss": 0.6833, "step": 5600 }, { "epoch": 50.44, "learning_rate": 0.0001557222222222222, "loss": 0.6673, "step": 5700 }, { "epoch": 51.33, "learning_rate": 0.00015294444444444443, "loss": 0.6791, "step": 5800 }, { "epoch": 52.21, "learning_rate": 0.00015016666666666664, "loss": 0.6292, "step": 5900 }, { "epoch": 53.1, "learning_rate": 0.00014738888888888888, "loss": 0.6441, "step": 6000 }, { "epoch": 53.1, "eval_loss": 0.7002069354057312, "eval_runtime": 55.3769, "eval_samples_per_second": 30.753, "eval_steps_per_second": 0.975, "eval_wer": 0.479815668202765, "step": 6000 }, { "epoch": 53.98, "learning_rate": 0.0001446111111111111, "loss": 0.6217, "step": 6100 }, { "epoch": 54.87, "learning_rate": 0.00014183333333333333, "loss": 0.6131, "step": 6200 }, { "epoch": 55.75, "learning_rate": 0.00013905555555555554, "loss": 0.6211, "step": 6300 }, { "epoch": 56.64, "learning_rate": 0.00013627777777777775, "loss": 0.6104, "step": 6400 }, { "epoch": 57.52, "learning_rate": 0.0001335, "loss": 0.5938, "step": 6500 }, { "epoch": 57.52, "eval_loss": 0.6249451637268066, "eval_runtime": 55.668, "eval_samples_per_second": 30.592, "eval_steps_per_second": 0.97, "eval_wer": 0.4578801843317972, "step": 6500 }, { "epoch": 58.41, "learning_rate": 0.0001307222222222222, "loss": 0.6015, "step": 6600 }, { "epoch": 59.29, "learning_rate": 0.00012794444444444442, "loss": 0.591, "step": 6700 }, { "epoch": 60.18, "learning_rate": 0.00012516666666666666, "loss": 0.5734, "step": 6800 }, { "epoch": 61.06, "learning_rate": 0.0001223888888888889, "loss": 0.5685, "step": 6900 }, { "epoch": 61.95, "learning_rate": 0.0001196111111111111, "loss": 0.5541, "step": 7000 }, { "epoch": 61.95, "eval_loss": 0.6184473633766174, "eval_runtime": 55.2876, "eval_samples_per_second": 30.803, "eval_steps_per_second": 0.977, "eval_wer": 0.4424884792626728, "step": 7000 }, { "epoch": 62.83, "learning_rate": 0.00011683333333333332, "loss": 0.5546, "step": 7100 }, { "epoch": 63.72, "learning_rate": 0.00011405555555555554, "loss": 0.5473, "step": 7200 }, { "epoch": 64.6, "learning_rate": 0.00011127777777777777, "loss": 0.5592, "step": 7300 }, { "epoch": 65.49, "learning_rate": 0.0001085, "loss": 0.5349, "step": 7400 }, { "epoch": 66.37, "learning_rate": 0.0001057222222222222, "loss": 0.5506, "step": 7500 }, { "epoch": 66.37, "eval_loss": 0.6962713003158569, "eval_runtime": 55.4374, "eval_samples_per_second": 30.719, "eval_steps_per_second": 0.974, "eval_wer": 0.45852534562211983, "step": 7500 }, { "epoch": 67.26, "learning_rate": 0.00010294444444444443, "loss": 0.5313, "step": 7600 }, { "epoch": 68.14, "learning_rate": 0.00010016666666666666, "loss": 0.5267, "step": 7700 }, { "epoch": 69.03, "learning_rate": 9.738888888888888e-05, "loss": 0.5222, "step": 7800 }, { "epoch": 69.91, "learning_rate": 9.46111111111111e-05, "loss": 0.5101, "step": 7900 }, { "epoch": 70.8, "learning_rate": 9.183333333333332e-05, "loss": 0.4998, "step": 8000 }, { "epoch": 70.8, "eval_loss": 0.6778160333633423, "eval_runtime": 56.1738, "eval_samples_per_second": 30.317, "eval_steps_per_second": 0.961, "eval_wer": 0.44682027649769585, "step": 8000 }, { "epoch": 71.68, "learning_rate": 8.905555555555555e-05, "loss": 0.4941, "step": 8100 }, { "epoch": 72.57, "learning_rate": 8.627777777777776e-05, "loss": 0.492, "step": 8200 }, { "epoch": 73.45, "learning_rate": 8.349999999999998e-05, "loss": 0.4741, "step": 8300 }, { "epoch": 74.34, "learning_rate": 8.072222222222222e-05, "loss": 0.495, "step": 8400 }, { "epoch": 75.22, "learning_rate": 7.794444444444445e-05, "loss": 0.4729, "step": 8500 }, { "epoch": 75.22, "eval_loss": 0.6383044719696045, "eval_runtime": 53.9538, "eval_samples_per_second": 31.564, "eval_steps_per_second": 1.001, "eval_wer": 0.4392626728110599, "step": 8500 }, { "epoch": 76.11, "learning_rate": 7.516666666666665e-05, "loss": 0.4696, "step": 8600 }, { "epoch": 76.99, "learning_rate": 7.238888888888889e-05, "loss": 0.4581, "step": 8700 }, { "epoch": 77.88, "learning_rate": 6.961111111111111e-05, "loss": 0.4583, "step": 8800 }, { "epoch": 78.76, "learning_rate": 6.683333333333332e-05, "loss": 0.4451, "step": 8900 }, { "epoch": 79.65, "learning_rate": 6.405555555555555e-05, "loss": 0.4535, "step": 9000 }, { "epoch": 79.65, "eval_loss": 0.6592639684677124, "eval_runtime": 53.7527, "eval_samples_per_second": 31.682, "eval_steps_per_second": 1.005, "eval_wer": 0.4368663594470046, "step": 9000 }, { "epoch": 80.53, "learning_rate": 6.130555555555555e-05, "loss": 0.4324, "step": 9100 }, { "epoch": 81.42, "learning_rate": 5.8527777777777774e-05, "loss": 0.4546, "step": 9200 }, { "epoch": 82.3, "learning_rate": 5.574999999999999e-05, "loss": 0.4391, "step": 9300 }, { "epoch": 83.19, "learning_rate": 5.297222222222222e-05, "loss": 0.4306, "step": 9400 }, { "epoch": 84.07, "learning_rate": 5.019444444444444e-05, "loss": 0.4358, "step": 9500 }, { "epoch": 84.07, "eval_loss": 0.6913911700248718, "eval_runtime": 55.4673, "eval_samples_per_second": 30.703, "eval_steps_per_second": 0.974, "eval_wer": 0.4422119815668203, "step": 9500 }, { "epoch": 84.96, "learning_rate": 4.741666666666666e-05, "loss": 0.4095, "step": 9600 }, { "epoch": 85.84, "learning_rate": 4.463888888888888e-05, "loss": 0.4148, "step": 9700 }, { "epoch": 86.73, "learning_rate": 4.186111111111111e-05, "loss": 0.4113, "step": 9800 }, { "epoch": 87.61, "learning_rate": 3.9083333333333326e-05, "loss": 0.405, "step": 9900 }, { "epoch": 88.5, "learning_rate": 3.630555555555555e-05, "loss": 0.402, "step": 10000 }, { "epoch": 88.5, "eval_loss": 0.6743763089179993, "eval_runtime": 53.7113, "eval_samples_per_second": 31.707, "eval_steps_per_second": 1.005, "eval_wer": 0.4269124423963134, "step": 10000 }, { "epoch": 89.38, "learning_rate": 3.352777777777777e-05, "loss": 0.3915, "step": 10100 }, { "epoch": 90.27, "learning_rate": 3.0749999999999995e-05, "loss": 0.394, "step": 10200 }, { "epoch": 91.15, "learning_rate": 2.7972222222222217e-05, "loss": 0.392, "step": 10300 }, { "epoch": 92.04, "learning_rate": 2.519444444444444e-05, "loss": 0.3937, "step": 10400 }, { "epoch": 92.92, "learning_rate": 2.2416666666666665e-05, "loss": 0.3946, "step": 10500 }, { "epoch": 92.92, "eval_loss": 0.6894700527191162, "eval_runtime": 55.0145, "eval_samples_per_second": 30.955, "eval_steps_per_second": 0.982, "eval_wer": 0.42746543778801843, "step": 10500 }, { "epoch": 93.81, "learning_rate": 1.9638888888888887e-05, "loss": 0.3881, "step": 10600 }, { "epoch": 94.69, "learning_rate": 1.686111111111111e-05, "loss": 0.3778, "step": 10700 }, { "epoch": 95.58, "learning_rate": 1.4083333333333331e-05, "loss": 0.3783, "step": 10800 }, { "epoch": 96.46, "learning_rate": 1.1305555555555553e-05, "loss": 0.3612, "step": 10900 }, { "epoch": 97.35, "learning_rate": 8.527777777777777e-06, "loss": 0.3734, "step": 11000 }, { "epoch": 97.35, "eval_loss": 0.6888979077339172, "eval_runtime": 55.289, "eval_samples_per_second": 30.802, "eval_steps_per_second": 0.977, "eval_wer": 0.4319815668202765, "step": 11000 }, { "epoch": 98.23, "learning_rate": 5.749999999999999e-06, "loss": 0.3635, "step": 11100 }, { "epoch": 99.12, "learning_rate": 2.9999999999999997e-06, "loss": 0.3775, "step": 11200 }, { "epoch": 100.0, "learning_rate": 2.222222222222222e-07, "loss": 0.3737, "step": 11300 }, { "epoch": 100.0, "step": 11300, "total_flos": 4.438667922365353e+19, "train_loss": 0.9041624231254105, "train_runtime": 18573.3759, "train_samples_per_second": 19.388, "train_steps_per_second": 0.608 } ], "max_steps": 11300, "num_train_epochs": 100, "total_flos": 4.438667922365353e+19, "trial_name": null, "trial_params": null }