{ "best_metric": null, "best_model_checkpoint": null, "epoch": 99.98823529411764, "global_step": 6300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.78, "learning_rate": 4.9000000000000005e-06, "loss": 11.1133, "step": 50 }, { "epoch": 1.58, "learning_rate": 9.900000000000002e-06, "loss": 3.3967, "step": 100 }, { "epoch": 2.38, "learning_rate": 1.49e-05, "loss": 3.2205, "step": 150 }, { "epoch": 3.17, "learning_rate": 1.9900000000000003e-05, "loss": 2.8143, "step": 200 }, { "epoch": 3.96, "learning_rate": 2.4900000000000002e-05, "loss": 1.9249, "step": 250 }, { "epoch": 4.75, "learning_rate": 2.9900000000000002e-05, "loss": 1.6708, "step": 300 }, { "epoch": 5.55, "learning_rate": 3.49e-05, "loss": 1.5501, "step": 350 }, { "epoch": 6.35, "learning_rate": 3.99e-05, "loss": 1.4258, "step": 400 }, { "epoch": 7.14, "learning_rate": 4.49e-05, "loss": 1.332, "step": 450 }, { "epoch": 7.93, "learning_rate": 4.99e-05, "loss": 1.2815, "step": 500 }, { "epoch": 7.93, "eval_cer": 0.10093122852447588, "eval_loss": 0.35359087586402893, "eval_runtime": 257.0651, "eval_samples_per_second": 16.852, "eval_steps_per_second": 2.108, "eval_wer": 0.47525724236188066, "step": 500 }, { "epoch": 8.72, "learning_rate": 4.957758620689655e-05, "loss": 1.2632, "step": 550 }, { "epoch": 9.52, "learning_rate": 4.9146551724137934e-05, "loss": 1.2239, "step": 600 }, { "epoch": 10.31, "learning_rate": 4.871551724137931e-05, "loss": 1.2044, "step": 650 }, { "epoch": 11.11, "learning_rate": 4.828448275862069e-05, "loss": 1.1918, "step": 700 }, { "epoch": 11.89, "learning_rate": 4.785344827586207e-05, "loss": 1.1641, "step": 750 }, { "epoch": 12.69, "learning_rate": 4.742241379310345e-05, "loss": 1.1718, "step": 800 }, { "epoch": 13.49, "learning_rate": 4.699137931034483e-05, "loss": 1.1638, "step": 850 }, { "epoch": 14.28, "learning_rate": 4.656034482758621e-05, "loss": 1.1317, "step": 900 }, { "epoch": 15.08, "learning_rate": 4.612931034482759e-05, "loss": 1.1334, "step": 950 }, { "epoch": 15.86, "learning_rate": 4.569827586206897e-05, "loss": 1.0869, "step": 1000 }, { "epoch": 15.86, "eval_cer": 0.06135152631841044, "eval_loss": 0.23165984451770782, "eval_runtime": 262.4285, "eval_samples_per_second": 16.507, "eval_steps_per_second": 2.065, "eval_wer": 0.3110653791356657, "step": 1000 }, { "epoch": 16.66, "learning_rate": 4.526724137931035e-05, "loss": 1.104, "step": 1050 }, { "epoch": 17.45, "learning_rate": 4.4836206896551726e-05, "loss": 1.109, "step": 1100 }, { "epoch": 18.25, "learning_rate": 4.440517241379311e-05, "loss": 1.0902, "step": 1150 }, { "epoch": 19.05, "learning_rate": 4.397413793103449e-05, "loss": 1.0676, "step": 1200 }, { "epoch": 19.83, "learning_rate": 4.3543103448275865e-05, "loss": 1.0453, "step": 1250 }, { "epoch": 20.63, "learning_rate": 4.311206896551725e-05, "loss": 1.0489, "step": 1300 }, { "epoch": 21.42, "learning_rate": 4.268103448275862e-05, "loss": 1.0495, "step": 1350 }, { "epoch": 22.22, "learning_rate": 4.2250000000000004e-05, "loss": 1.0325, "step": 1400 }, { "epoch": 23.02, "learning_rate": 4.181896551724138e-05, "loss": 1.0298, "step": 1450 }, { "epoch": 23.8, "learning_rate": 4.138793103448276e-05, "loss": 0.9984, "step": 1500 }, { "epoch": 23.8, "eval_cer": 0.052054180568696776, "eval_loss": 0.20215292274951935, "eval_runtime": 259.0562, "eval_samples_per_second": 16.722, "eval_steps_per_second": 2.092, "eval_wer": 0.26762703815102107, "step": 1500 }, { "epoch": 24.6, "learning_rate": 4.0956896551724136e-05, "loss": 1.0118, "step": 1550 }, { "epoch": 25.39, "learning_rate": 4.053448275862069e-05, "loss": 1.0165, "step": 1600 }, { "epoch": 26.19, "learning_rate": 4.0103448275862074e-05, "loss": 1.0075, "step": 1650 }, { "epoch": 26.97, "learning_rate": 3.967241379310345e-05, "loss": 1.003, "step": 1700 }, { "epoch": 27.77, "learning_rate": 3.924137931034483e-05, "loss": 0.9905, "step": 1750 }, { "epoch": 28.56, "learning_rate": 3.8810344827586206e-05, "loss": 1.0019, "step": 1800 }, { "epoch": 29.36, "learning_rate": 3.837931034482759e-05, "loss": 1.0085, "step": 1850 }, { "epoch": 30.16, "learning_rate": 3.794827586206896e-05, "loss": 0.9868, "step": 1900 }, { "epoch": 30.94, "learning_rate": 3.7517241379310345e-05, "loss": 0.9816, "step": 1950 }, { "epoch": 31.74, "learning_rate": 3.708620689655173e-05, "loss": 0.975, "step": 2000 }, { "epoch": 31.74, "eval_cer": 0.04868781435187491, "eval_loss": 0.19483695924282074, "eval_runtime": 259.0795, "eval_samples_per_second": 16.721, "eval_steps_per_second": 2.092, "eval_wer": 0.24688934620864333, "step": 2000 }, { "epoch": 32.53, "learning_rate": 3.66551724137931e-05, "loss": 0.9552, "step": 2050 }, { "epoch": 33.33, "learning_rate": 3.6224137931034484e-05, "loss": 0.9649, "step": 2100 }, { "epoch": 34.13, "learning_rate": 3.5793103448275866e-05, "loss": 0.9632, "step": 2150 }, { "epoch": 34.91, "learning_rate": 3.536206896551724e-05, "loss": 0.9542, "step": 2200 }, { "epoch": 35.71, "learning_rate": 3.493103448275862e-05, "loss": 0.9686, "step": 2250 }, { "epoch": 36.5, "learning_rate": 3.45e-05, "loss": 0.9418, "step": 2300 }, { "epoch": 37.3, "learning_rate": 3.406896551724138e-05, "loss": 0.9295, "step": 2350 }, { "epoch": 38.09, "learning_rate": 3.363793103448276e-05, "loss": 0.9372, "step": 2400 }, { "epoch": 38.88, "learning_rate": 3.320689655172414e-05, "loss": 0.9205, "step": 2450 }, { "epoch": 39.67, "learning_rate": 3.277586206896552e-05, "loss": 0.9306, "step": 2500 }, { "epoch": 39.67, "eval_cer": 0.046377172451571136, "eval_loss": 0.19161736965179443, "eval_runtime": 258.3157, "eval_samples_per_second": 16.77, "eval_steps_per_second": 2.098, "eval_wer": 0.2377394332752889, "step": 2500 }, { "epoch": 40.47, "learning_rate": 3.23448275862069e-05, "loss": 0.9331, "step": 2550 }, { "epoch": 41.27, "learning_rate": 3.1913793103448276e-05, "loss": 0.8936, "step": 2600 }, { "epoch": 42.06, "learning_rate": 3.148275862068966e-05, "loss": 0.8987, "step": 2650 }, { "epoch": 42.85, "learning_rate": 3.105172413793104e-05, "loss": 0.8853, "step": 2700 }, { "epoch": 43.64, "learning_rate": 3.0620689655172415e-05, "loss": 0.9106, "step": 2750 }, { "epoch": 44.44, "learning_rate": 3.0189655172413794e-05, "loss": 0.8932, "step": 2800 }, { "epoch": 45.24, "learning_rate": 2.9758620689655176e-05, "loss": 0.9096, "step": 2850 }, { "epoch": 46.03, "learning_rate": 2.932758620689655e-05, "loss": 0.8919, "step": 2900 }, { "epoch": 46.82, "learning_rate": 2.8896551724137933e-05, "loss": 0.8744, "step": 2950 }, { "epoch": 47.61, "learning_rate": 2.8465517241379315e-05, "loss": 0.8868, "step": 3000 }, { "epoch": 47.61, "eval_cer": 0.04391713560081669, "eval_loss": 0.19031885266304016, "eval_runtime": 265.4438, "eval_samples_per_second": 16.32, "eval_steps_per_second": 2.042, "eval_wer": 0.2257400664872566, "step": 3000 }, { "epoch": 48.41, "learning_rate": 2.803448275862069e-05, "loss": 0.8793, "step": 3050 }, { "epoch": 49.2, "learning_rate": 2.7603448275862072e-05, "loss": 0.8739, "step": 3100 }, { "epoch": 49.99, "learning_rate": 2.717241379310345e-05, "loss": 0.8696, "step": 3150 }, { "epoch": 50.78, "learning_rate": 2.674137931034483e-05, "loss": 0.863, "step": 3200 }, { "epoch": 51.58, "learning_rate": 2.6310344827586207e-05, "loss": 0.8612, "step": 3250 }, { "epoch": 52.38, "learning_rate": 2.587931034482759e-05, "loss": 0.8639, "step": 3300 }, { "epoch": 53.17, "learning_rate": 2.5448275862068964e-05, "loss": 0.8523, "step": 3350 }, { "epoch": 53.96, "learning_rate": 2.5017241379310346e-05, "loss": 0.8577, "step": 3400 }, { "epoch": 54.75, "learning_rate": 2.4586206896551725e-05, "loss": 0.8465, "step": 3450 }, { "epoch": 55.55, "learning_rate": 2.4155172413793103e-05, "loss": 0.8424, "step": 3500 }, { "epoch": 55.55, "eval_cer": 0.042293710472586024, "eval_loss": 0.17861121892929077, "eval_runtime": 259.0288, "eval_samples_per_second": 16.724, "eval_steps_per_second": 2.092, "eval_wer": 0.22061104954883648, "step": 3500 }, { "epoch": 56.35, "learning_rate": 2.3724137931034485e-05, "loss": 0.8436, "step": 3550 }, { "epoch": 57.14, "learning_rate": 2.3293103448275864e-05, "loss": 0.8404, "step": 3600 }, { "epoch": 57.93, "learning_rate": 2.2862068965517242e-05, "loss": 0.8304, "step": 3650 }, { "epoch": 58.72, "learning_rate": 2.2431034482758624e-05, "loss": 0.8331, "step": 3700 }, { "epoch": 59.52, "learning_rate": 2.2000000000000003e-05, "loss": 0.824, "step": 3750 }, { "epoch": 60.31, "learning_rate": 2.1568965517241378e-05, "loss": 0.8328, "step": 3800 }, { "epoch": 61.11, "learning_rate": 2.113793103448276e-05, "loss": 0.8234, "step": 3850 }, { "epoch": 61.89, "learning_rate": 2.070689655172414e-05, "loss": 0.8098, "step": 3900 }, { "epoch": 62.69, "learning_rate": 2.0275862068965517e-05, "loss": 0.8287, "step": 3950 }, { "epoch": 63.49, "learning_rate": 1.98448275862069e-05, "loss": 0.8126, "step": 4000 }, { "epoch": 63.49, "eval_cer": 0.04164135252228475, "eval_loss": 0.18486249446868896, "eval_runtime": 261.7127, "eval_samples_per_second": 16.553, "eval_steps_per_second": 2.071, "eval_wer": 0.2159886021845813, "step": 4000 }, { "epoch": 64.28, "learning_rate": 1.9413793103448277e-05, "loss": 0.8089, "step": 4050 }, { "epoch": 65.08, "learning_rate": 1.8982758620689656e-05, "loss": 0.8126, "step": 4100 }, { "epoch": 65.86, "learning_rate": 1.8551724137931034e-05, "loss": 0.7975, "step": 4150 }, { "epoch": 66.66, "learning_rate": 1.8120689655172416e-05, "loss": 0.8049, "step": 4200 }, { "epoch": 67.45, "learning_rate": 1.7698275862068966e-05, "loss": 0.8088, "step": 4250 }, { "epoch": 68.25, "learning_rate": 1.7267241379310344e-05, "loss": 0.8038, "step": 4300 }, { "epoch": 69.05, "learning_rate": 1.6836206896551726e-05, "loss": 0.7886, "step": 4350 }, { "epoch": 69.83, "learning_rate": 1.6405172413793105e-05, "loss": 0.7735, "step": 4400 }, { "epoch": 70.63, "learning_rate": 1.5974137931034483e-05, "loss": 0.7837, "step": 4450 }, { "epoch": 71.42, "learning_rate": 1.5543103448275865e-05, "loss": 0.7901, "step": 4500 }, { "epoch": 71.42, "eval_cer": 0.04126786514615806, "eval_loss": 0.18691900372505188, "eval_runtime": 261.5464, "eval_samples_per_second": 16.563, "eval_steps_per_second": 2.072, "eval_wer": 0.21383568149438023, "step": 4500 }, { "epoch": 72.22, "learning_rate": 1.5112068965517242e-05, "loss": 0.7949, "step": 4550 }, { "epoch": 73.02, "learning_rate": 1.468103448275862e-05, "loss": 0.7893, "step": 4600 }, { "epoch": 73.8, "learning_rate": 1.4249999999999999e-05, "loss": 0.7603, "step": 4650 }, { "epoch": 74.6, "learning_rate": 1.3818965517241381e-05, "loss": 0.776, "step": 4700 }, { "epoch": 75.39, "learning_rate": 1.338793103448276e-05, "loss": 0.7755, "step": 4750 }, { "epoch": 76.19, "learning_rate": 1.2956896551724138e-05, "loss": 0.7751, "step": 4800 }, { "epoch": 76.97, "learning_rate": 1.2525862068965518e-05, "loss": 0.7608, "step": 4850 }, { "epoch": 77.77, "learning_rate": 1.2094827586206897e-05, "loss": 0.7663, "step": 4900 }, { "epoch": 78.56, "learning_rate": 1.1663793103448277e-05, "loss": 0.7656, "step": 4950 }, { "epoch": 79.36, "learning_rate": 1.1232758620689656e-05, "loss": 0.7671, "step": 5000 }, { "epoch": 79.36, "eval_cer": 0.03937054927543449, "eval_loss": 0.18550464510917664, "eval_runtime": 260.3539, "eval_samples_per_second": 16.639, "eval_steps_per_second": 2.082, "eval_wer": 0.20747190121893302, "step": 5000 }, { "epoch": 80.16, "learning_rate": 1.0801724137931036e-05, "loss": 0.7694, "step": 5050 }, { "epoch": 80.94, "learning_rate": 1.0370689655172414e-05, "loss": 0.7672, "step": 5100 }, { "epoch": 81.74, "learning_rate": 9.939655172413793e-06, "loss": 0.7444, "step": 5150 }, { "epoch": 82.53, "learning_rate": 9.508620689655173e-06, "loss": 0.7534, "step": 5200 }, { "epoch": 83.33, "learning_rate": 9.077586206896552e-06, "loss": 0.7453, "step": 5250 }, { "epoch": 84.13, "learning_rate": 8.646551724137932e-06, "loss": 0.7494, "step": 5300 }, { "epoch": 84.91, "learning_rate": 8.224137931034483e-06, "loss": 0.7425, "step": 5350 }, { "epoch": 85.71, "learning_rate": 7.793103448275863e-06, "loss": 0.7499, "step": 5400 }, { "epoch": 86.5, "learning_rate": 7.370689655172413e-06, "loss": 0.735, "step": 5450 }, { "epoch": 87.3, "learning_rate": 6.939655172413794e-06, "loss": 0.7467, "step": 5500 }, { "epoch": 87.3, "eval_cer": 0.03894228375080922, "eval_loss": 0.18841499090194702, "eval_runtime": 261.43, "eval_samples_per_second": 16.57, "eval_steps_per_second": 2.073, "eval_wer": 0.20490739274972297, "step": 5500 }, { "epoch": 88.09, "learning_rate": 6.508620689655173e-06, "loss": 0.7348, "step": 5550 }, { "epoch": 88.88, "learning_rate": 6.0775862068965515e-06, "loss": 0.7244, "step": 5600 }, { "epoch": 89.67, "learning_rate": 5.646551724137932e-06, "loss": 0.7394, "step": 5650 }, { "epoch": 90.47, "learning_rate": 5.21551724137931e-06, "loss": 0.7423, "step": 5700 }, { "epoch": 91.27, "learning_rate": 4.78448275862069e-06, "loss": 0.7251, "step": 5750 }, { "epoch": 92.06, "learning_rate": 4.353448275862069e-06, "loss": 0.7304, "step": 5800 }, { "epoch": 92.85, "learning_rate": 3.9224137931034484e-06, "loss": 0.7153, "step": 5850 }, { "epoch": 93.64, "learning_rate": 3.491379310344828e-06, "loss": 0.7287, "step": 5900 }, { "epoch": 94.44, "learning_rate": 3.0603448275862068e-06, "loss": 0.7349, "step": 5950 }, { "epoch": 95.24, "learning_rate": 2.6293103448275866e-06, "loss": 0.731, "step": 6000 }, { "epoch": 95.24, "eval_cer": 0.03871819132513321, "eval_loss": 0.1877404898405075, "eval_runtime": 259.3367, "eval_samples_per_second": 16.704, "eval_steps_per_second": 2.09, "eval_wer": 0.2059838530948235, "step": 6000 }, { "epoch": 96.03, "learning_rate": 2.1982758620689655e-06, "loss": 0.7151, "step": 6050 }, { "epoch": 96.82, "learning_rate": 1.7672413793103449e-06, "loss": 0.713, "step": 6100 }, { "epoch": 97.61, "learning_rate": 1.3362068965517243e-06, "loss": 0.7257, "step": 6150 }, { "epoch": 98.41, "learning_rate": 9.051724137931035e-07, "loss": 0.7287, "step": 6200 }, { "epoch": 99.2, "learning_rate": 4.7413793103448276e-07, "loss": 0.7273, "step": 6250 }, { "epoch": 99.99, "learning_rate": 4.310344827586207e-08, "loss": 0.7082, "step": 6300 }, { "epoch": 99.99, "step": 6300, "total_flos": 4.0887685530877926e+20, "train_loss": 1.049089940994505, "train_runtime": 95054.1856, "train_samples_per_second": 10.723, "train_steps_per_second": 0.066 } ], "max_steps": 6300, "num_train_epochs": 100, "total_flos": 4.0887685530877926e+20, "trial_name": null, "trial_params": null }