{ "best_metric": 80.47772163527792, "best_model_checkpoint": "./checkpoint-9000", "epoch": 30.67484662576687, "eval_steps": 500, "global_step": 10000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08, "learning_rate": 2.2e-08, "loss": 1.4678, "step": 25 }, { "epoch": 0.15, "learning_rate": 4.7e-08, "loss": 1.4671, "step": 50 }, { "epoch": 0.23, "learning_rate": 7.2e-08, "loss": 1.4626, "step": 75 }, { "epoch": 0.31, "learning_rate": 9.7e-08, "loss": 1.4314, "step": 100 }, { "epoch": 0.38, "learning_rate": 1.2199999999999998e-07, "loss": 1.3735, "step": 125 }, { "epoch": 0.46, "learning_rate": 1.4699999999999998e-07, "loss": 1.3455, "step": 150 }, { "epoch": 0.54, "learning_rate": 1.7199999999999998e-07, "loss": 1.3064, "step": 175 }, { "epoch": 0.61, "learning_rate": 1.97e-07, "loss": 1.2316, "step": 200 }, { "epoch": 0.69, "learning_rate": 2.22e-07, "loss": 1.1693, "step": 225 }, { "epoch": 0.77, "learning_rate": 2.47e-07, "loss": 1.1127, "step": 250 }, { "epoch": 0.84, "learning_rate": 2.72e-07, "loss": 1.0361, "step": 275 }, { "epoch": 0.92, "learning_rate": 2.9699999999999997e-07, "loss": 0.9756, "step": 300 }, { "epoch": 1.0, "learning_rate": 3.22e-07, "loss": 0.9191, "step": 325 }, { "epoch": 1.07, "learning_rate": 3.4699999999999997e-07, "loss": 0.8692, "step": 350 }, { "epoch": 1.15, "learning_rate": 3.72e-07, "loss": 0.8278, "step": 375 }, { "epoch": 1.23, "learning_rate": 3.97e-07, "loss": 0.822, "step": 400 }, { "epoch": 1.3, "learning_rate": 4.2199999999999994e-07, "loss": 0.767, "step": 425 }, { "epoch": 1.38, "learning_rate": 4.4699999999999997e-07, "loss": 0.7674, "step": 450 }, { "epoch": 1.46, "learning_rate": 4.7199999999999994e-07, "loss": 0.7284, "step": 475 }, { "epoch": 1.53, "learning_rate": 4.97e-07, "loss": 0.7392, "step": 500 }, { "epoch": 1.53, "eval_loss": 0.8622996807098389, "eval_runtime": 1140.8248, "eval_samples_per_second": 9.174, "eval_steps_per_second": 0.288, "eval_wer": 100.81331567996973, "step": 500 }, { "epoch": 1.61, "learning_rate": 4.988421052631579e-07, "loss": 0.7281, "step": 525 }, { "epoch": 1.69, "learning_rate": 4.975263157894737e-07, "loss": 0.6999, "step": 550 }, { "epoch": 1.76, "learning_rate": 4.962105263157894e-07, "loss": 0.7069, "step": 575 }, { "epoch": 1.84, "learning_rate": 4.948947368421052e-07, "loss": 0.6858, "step": 600 }, { "epoch": 1.92, "learning_rate": 4.93578947368421e-07, "loss": 0.6881, "step": 625 }, { "epoch": 1.99, "learning_rate": 4.922631578947368e-07, "loss": 0.6889, "step": 650 }, { "epoch": 2.07, "learning_rate": 4.909473684210526e-07, "loss": 0.6509, "step": 675 }, { "epoch": 2.15, "learning_rate": 4.896315789473684e-07, "loss": 0.6679, "step": 700 }, { "epoch": 2.22, "learning_rate": 4.883157894736842e-07, "loss": 0.647, "step": 725 }, { "epoch": 2.3, "learning_rate": 4.87e-07, "loss": 0.6257, "step": 750 }, { "epoch": 2.38, "learning_rate": 4.856842105263157e-07, "loss": 0.6503, "step": 775 }, { "epoch": 2.45, "learning_rate": 4.843684210526315e-07, "loss": 0.6444, "step": 800 }, { "epoch": 2.53, "learning_rate": 4.830526315789473e-07, "loss": 0.6248, "step": 825 }, { "epoch": 2.61, "learning_rate": 4.817368421052631e-07, "loss": 0.6189, "step": 850 }, { "epoch": 2.68, "learning_rate": 4.804210526315789e-07, "loss": 0.6117, "step": 875 }, { "epoch": 2.76, "learning_rate": 4.791052631578947e-07, "loss": 0.616, "step": 900 }, { "epoch": 2.84, "learning_rate": 4.777894736842105e-07, "loss": 0.6149, "step": 925 }, { "epoch": 2.91, "learning_rate": 4.764736842105263e-07, "loss": 0.6161, "step": 950 }, { "epoch": 2.99, "learning_rate": 4.7515789473684207e-07, "loss": 0.5894, "step": 975 }, { "epoch": 3.07, "learning_rate": 4.7384210526315786e-07, "loss": 0.5938, "step": 1000 }, { "epoch": 3.07, "eval_loss": 0.7397418022155762, "eval_runtime": 1026.1036, "eval_samples_per_second": 10.2, "eval_steps_per_second": 0.32, "eval_wer": 93.66505444621579, "step": 1000 }, { "epoch": 3.14, "learning_rate": 4.7252631578947365e-07, "loss": 0.5896, "step": 1025 }, { "epoch": 3.22, "learning_rate": 4.7121052631578944e-07, "loss": 0.5836, "step": 1050 }, { "epoch": 3.3, "learning_rate": 4.6989473684210524e-07, "loss": 0.5846, "step": 1075 }, { "epoch": 3.37, "learning_rate": 4.6857894736842103e-07, "loss": 0.5738, "step": 1100 }, { "epoch": 3.45, "learning_rate": 4.672631578947368e-07, "loss": 0.5675, "step": 1125 }, { "epoch": 3.53, "learning_rate": 4.659473684210526e-07, "loss": 0.5737, "step": 1150 }, { "epoch": 3.6, "learning_rate": 4.646315789473684e-07, "loss": 0.5742, "step": 1175 }, { "epoch": 3.68, "learning_rate": 4.633157894736842e-07, "loss": 0.5902, "step": 1200 }, { "epoch": 3.76, "learning_rate": 4.62e-07, "loss": 0.573, "step": 1225 }, { "epoch": 3.83, "learning_rate": 4.6068421052631577e-07, "loss": 0.5686, "step": 1250 }, { "epoch": 3.91, "learning_rate": 4.5936842105263156e-07, "loss": 0.5673, "step": 1275 }, { "epoch": 3.99, "learning_rate": 4.5805263157894735e-07, "loss": 0.5452, "step": 1300 }, { "epoch": 4.06, "learning_rate": 4.5673684210526314e-07, "loss": 0.5624, "step": 1325 }, { "epoch": 4.14, "learning_rate": 4.5542105263157894e-07, "loss": 0.5435, "step": 1350 }, { "epoch": 4.22, "learning_rate": 4.5410526315789473e-07, "loss": 0.5538, "step": 1375 }, { "epoch": 4.29, "learning_rate": 4.527894736842105e-07, "loss": 0.5677, "step": 1400 }, { "epoch": 4.37, "learning_rate": 4.514736842105263e-07, "loss": 0.5333, "step": 1425 }, { "epoch": 4.45, "learning_rate": 4.5015789473684205e-07, "loss": 0.5482, "step": 1450 }, { "epoch": 4.52, "learning_rate": 4.4884210526315784e-07, "loss": 0.5304, "step": 1475 }, { "epoch": 4.6, "learning_rate": 4.4752631578947363e-07, "loss": 0.5388, "step": 1500 }, { "epoch": 4.6, "eval_loss": 0.6952596306800842, "eval_runtime": 1013.9022, "eval_samples_per_second": 10.322, "eval_steps_per_second": 0.324, "eval_wer": 92.30052149477153, "step": 1500 }, { "epoch": 4.68, "learning_rate": 4.462105263157894e-07, "loss": 0.5207, "step": 1525 }, { "epoch": 4.75, "learning_rate": 4.4489473684210526e-07, "loss": 0.5255, "step": 1550 }, { "epoch": 4.83, "learning_rate": 4.4357894736842105e-07, "loss": 0.5491, "step": 1575 }, { "epoch": 4.91, "learning_rate": 4.4226315789473685e-07, "loss": 0.5275, "step": 1600 }, { "epoch": 4.98, "learning_rate": 4.4094736842105264e-07, "loss": 0.5493, "step": 1625 }, { "epoch": 5.06, "learning_rate": 4.3963157894736843e-07, "loss": 0.5299, "step": 1650 }, { "epoch": 5.14, "learning_rate": 4.383157894736842e-07, "loss": 0.5211, "step": 1675 }, { "epoch": 5.21, "learning_rate": 4.3699999999999996e-07, "loss": 0.5179, "step": 1700 }, { "epoch": 5.29, "learning_rate": 4.3568421052631575e-07, "loss": 0.5311, "step": 1725 }, { "epoch": 5.37, "learning_rate": 4.3436842105263154e-07, "loss": 0.5316, "step": 1750 }, { "epoch": 5.44, "learning_rate": 4.3305263157894733e-07, "loss": 0.5113, "step": 1775 }, { "epoch": 5.52, "learning_rate": 4.317368421052631e-07, "loss": 0.5066, "step": 1800 }, { "epoch": 5.6, "learning_rate": 4.304210526315789e-07, "loss": 0.5082, "step": 1825 }, { "epoch": 5.67, "learning_rate": 4.291052631578947e-07, "loss": 0.5041, "step": 1850 }, { "epoch": 5.75, "learning_rate": 4.2778947368421055e-07, "loss": 0.5305, "step": 1875 }, { "epoch": 5.83, "learning_rate": 4.2647368421052634e-07, "loss": 0.502, "step": 1900 }, { "epoch": 5.9, "learning_rate": 4.2515789473684213e-07, "loss": 0.5229, "step": 1925 }, { "epoch": 5.98, "learning_rate": 4.2384210526315787e-07, "loss": 0.5048, "step": 1950 }, { "epoch": 6.06, "learning_rate": 4.2252631578947366e-07, "loss": 0.5063, "step": 1975 }, { "epoch": 6.13, "learning_rate": 4.2121052631578945e-07, "loss": 0.4982, "step": 2000 }, { "epoch": 6.13, "eval_loss": 0.6682108640670776, "eval_runtime": 1000.5318, "eval_samples_per_second": 10.46, "eval_steps_per_second": 0.328, "eval_wer": 88.93917695695642, "step": 2000 }, { "epoch": 6.21, "learning_rate": 4.1989473684210524e-07, "loss": 0.4973, "step": 2025 }, { "epoch": 6.29, "learning_rate": 4.1857894736842103e-07, "loss": 0.4864, "step": 2050 }, { "epoch": 6.37, "learning_rate": 4.172631578947368e-07, "loss": 0.5065, "step": 2075 }, { "epoch": 6.44, "learning_rate": 4.159473684210526e-07, "loss": 0.5117, "step": 2100 }, { "epoch": 6.52, "learning_rate": 4.146315789473684e-07, "loss": 0.4927, "step": 2125 }, { "epoch": 6.6, "learning_rate": 4.133157894736842e-07, "loss": 0.4957, "step": 2150 }, { "epoch": 6.67, "learning_rate": 4.12e-07, "loss": 0.4933, "step": 2175 }, { "epoch": 6.75, "learning_rate": 4.106842105263157e-07, "loss": 0.4908, "step": 2200 }, { "epoch": 6.83, "learning_rate": 4.0936842105263157e-07, "loss": 0.4925, "step": 2225 }, { "epoch": 6.9, "learning_rate": 4.0805263157894736e-07, "loss": 0.4896, "step": 2250 }, { "epoch": 6.98, "learning_rate": 4.0673684210526315e-07, "loss": 0.4895, "step": 2275 }, { "epoch": 7.06, "learning_rate": 4.0542105263157894e-07, "loss": 0.4854, "step": 2300 }, { "epoch": 7.13, "learning_rate": 4.0410526315789473e-07, "loss": 0.4833, "step": 2325 }, { "epoch": 7.21, "learning_rate": 4.027894736842105e-07, "loss": 0.4865, "step": 2350 }, { "epoch": 7.29, "learning_rate": 4.014736842105263e-07, "loss": 0.479, "step": 2375 }, { "epoch": 7.36, "learning_rate": 4.001578947368421e-07, "loss": 0.4846, "step": 2400 }, { "epoch": 7.44, "learning_rate": 3.988421052631579e-07, "loss": 0.476, "step": 2425 }, { "epoch": 7.52, "learning_rate": 3.9752631578947363e-07, "loss": 0.4839, "step": 2450 }, { "epoch": 7.59, "learning_rate": 3.962105263157894e-07, "loss": 0.4812, "step": 2475 }, { "epoch": 7.67, "learning_rate": 3.948947368421052e-07, "loss": 0.4795, "step": 2500 }, { "epoch": 7.67, "eval_loss": 0.6512458324432373, "eval_runtime": 980.8976, "eval_samples_per_second": 10.67, "eval_steps_per_second": 0.334, "eval_wer": 90.15239536329001, "step": 2500 }, { "epoch": 7.75, "learning_rate": 3.93578947368421e-07, "loss": 0.4687, "step": 2525 }, { "epoch": 7.82, "learning_rate": 3.9226315789473685e-07, "loss": 0.478, "step": 2550 }, { "epoch": 7.9, "learning_rate": 3.9094736842105264e-07, "loss": 0.4621, "step": 2575 }, { "epoch": 7.98, "learning_rate": 3.8963157894736843e-07, "loss": 0.4774, "step": 2600 }, { "epoch": 8.05, "learning_rate": 3.883157894736842e-07, "loss": 0.4712, "step": 2625 }, { "epoch": 8.13, "learning_rate": 3.87e-07, "loss": 0.4589, "step": 2650 }, { "epoch": 8.21, "learning_rate": 3.856842105263158e-07, "loss": 0.4565, "step": 2675 }, { "epoch": 8.28, "learning_rate": 3.8436842105263154e-07, "loss": 0.4732, "step": 2700 }, { "epoch": 8.36, "learning_rate": 3.8305263157894733e-07, "loss": 0.4791, "step": 2725 }, { "epoch": 8.44, "learning_rate": 3.817368421052631e-07, "loss": 0.4719, "step": 2750 }, { "epoch": 8.51, "learning_rate": 3.804210526315789e-07, "loss": 0.4719, "step": 2775 }, { "epoch": 8.59, "learning_rate": 3.791052631578947e-07, "loss": 0.4805, "step": 2800 }, { "epoch": 8.67, "learning_rate": 3.777894736842105e-07, "loss": 0.4655, "step": 2825 }, { "epoch": 8.74, "learning_rate": 3.764736842105263e-07, "loss": 0.459, "step": 2850 }, { "epoch": 8.82, "learning_rate": 3.751578947368421e-07, "loss": 0.4437, "step": 2875 }, { "epoch": 8.9, "learning_rate": 3.738421052631579e-07, "loss": 0.4595, "step": 2900 }, { "epoch": 8.97, "learning_rate": 3.725263157894737e-07, "loss": 0.462, "step": 2925 }, { "epoch": 9.05, "learning_rate": 3.7121052631578945e-07, "loss": 0.4692, "step": 2950 }, { "epoch": 9.13, "learning_rate": 3.6989473684210524e-07, "loss": 0.463, "step": 2975 }, { "epoch": 9.2, "learning_rate": 3.6857894736842103e-07, "loss": 0.4483, "step": 3000 }, { "epoch": 9.2, "eval_loss": 0.6372915506362915, "eval_runtime": 968.9455, "eval_samples_per_second": 10.801, "eval_steps_per_second": 0.339, "eval_wer": 87.12340241562863, "step": 3000 }, { "epoch": 9.28, "learning_rate": 3.672631578947368e-07, "loss": 0.469, "step": 3025 }, { "epoch": 9.36, "learning_rate": 3.659473684210526e-07, "loss": 0.4572, "step": 3050 }, { "epoch": 9.43, "learning_rate": 3.646315789473684e-07, "loss": 0.4458, "step": 3075 }, { "epoch": 9.51, "learning_rate": 3.633157894736842e-07, "loss": 0.4532, "step": 3100 }, { "epoch": 9.59, "learning_rate": 3.62e-07, "loss": 0.4506, "step": 3125 }, { "epoch": 9.66, "learning_rate": 3.606842105263158e-07, "loss": 0.4504, "step": 3150 }, { "epoch": 9.74, "learning_rate": 3.5936842105263157e-07, "loss": 0.4412, "step": 3175 }, { "epoch": 9.82, "learning_rate": 3.580526315789473e-07, "loss": 0.4631, "step": 3200 }, { "epoch": 9.89, "learning_rate": 3.5673684210526315e-07, "loss": 0.4392, "step": 3225 }, { "epoch": 9.97, "learning_rate": 3.5542105263157894e-07, "loss": 0.4409, "step": 3250 }, { "epoch": 10.05, "learning_rate": 3.5410526315789473e-07, "loss": 0.4404, "step": 3275 }, { "epoch": 10.12, "learning_rate": 3.527894736842105e-07, "loss": 0.439, "step": 3300 }, { "epoch": 10.2, "learning_rate": 3.514736842105263e-07, "loss": 0.4375, "step": 3325 }, { "epoch": 10.28, "learning_rate": 3.501578947368421e-07, "loss": 0.4463, "step": 3350 }, { "epoch": 10.35, "learning_rate": 3.488421052631579e-07, "loss": 0.4294, "step": 3375 }, { "epoch": 10.43, "learning_rate": 3.475263157894737e-07, "loss": 0.4453, "step": 3400 }, { "epoch": 10.51, "learning_rate": 3.462105263157895e-07, "loss": 0.4483, "step": 3425 }, { "epoch": 10.58, "learning_rate": 3.448947368421052e-07, "loss": 0.4488, "step": 3450 }, { "epoch": 10.66, "learning_rate": 3.43578947368421e-07, "loss": 0.4323, "step": 3475 }, { "epoch": 10.74, "learning_rate": 3.422631578947368e-07, "loss": 0.4374, "step": 3500 }, { "epoch": 10.74, "eval_loss": 0.626091480255127, "eval_runtime": 975.9202, "eval_samples_per_second": 10.724, "eval_steps_per_second": 0.336, "eval_wer": 85.31438298792186, "step": 3500 }, { "epoch": 10.81, "learning_rate": 3.409473684210526e-07, "loss": 0.4479, "step": 3525 }, { "epoch": 10.89, "learning_rate": 3.396315789473684e-07, "loss": 0.4382, "step": 3550 }, { "epoch": 10.97, "learning_rate": 3.383157894736842e-07, "loss": 0.4409, "step": 3575 }, { "epoch": 11.04, "learning_rate": 3.37e-07, "loss": 0.4321, "step": 3600 }, { "epoch": 11.12, "learning_rate": 3.356842105263158e-07, "loss": 0.4455, "step": 3625 }, { "epoch": 11.2, "learning_rate": 3.343684210526316e-07, "loss": 0.4302, "step": 3650 }, { "epoch": 11.27, "learning_rate": 3.330526315789474e-07, "loss": 0.4218, "step": 3675 }, { "epoch": 11.35, "learning_rate": 3.317368421052631e-07, "loss": 0.4362, "step": 3700 }, { "epoch": 11.43, "learning_rate": 3.304210526315789e-07, "loss": 0.4189, "step": 3725 }, { "epoch": 11.5, "learning_rate": 3.291052631578947e-07, "loss": 0.443, "step": 3750 }, { "epoch": 11.58, "learning_rate": 3.277894736842105e-07, "loss": 0.4242, "step": 3775 }, { "epoch": 11.66, "learning_rate": 3.264736842105263e-07, "loss": 0.4333, "step": 3800 }, { "epoch": 11.73, "learning_rate": 3.251578947368421e-07, "loss": 0.4339, "step": 3825 }, { "epoch": 11.81, "learning_rate": 3.2384210526315787e-07, "loss": 0.4275, "step": 3850 }, { "epoch": 11.89, "learning_rate": 3.2252631578947366e-07, "loss": 0.4339, "step": 3875 }, { "epoch": 11.96, "learning_rate": 3.2121052631578945e-07, "loss": 0.4351, "step": 3900 }, { "epoch": 12.04, "learning_rate": 3.198947368421053e-07, "loss": 0.4246, "step": 3925 }, { "epoch": 12.12, "learning_rate": 3.1857894736842103e-07, "loss": 0.4213, "step": 3950 }, { "epoch": 12.19, "learning_rate": 3.172631578947368e-07, "loss": 0.4255, "step": 3975 }, { "epoch": 12.27, "learning_rate": 3.159473684210526e-07, "loss": 0.4331, "step": 4000 }, { "epoch": 12.27, "eval_loss": 0.6179295778274536, "eval_runtime": 987.422, "eval_samples_per_second": 10.599, "eval_steps_per_second": 0.332, "eval_wer": 86.42897673538869, "step": 4000 }, { "epoch": 12.35, "learning_rate": 3.146315789473684e-07, "loss": 0.4194, "step": 4025 }, { "epoch": 12.42, "learning_rate": 3.133157894736842e-07, "loss": 0.4362, "step": 4050 }, { "epoch": 12.5, "learning_rate": 3.12e-07, "loss": 0.4288, "step": 4075 }, { "epoch": 12.58, "learning_rate": 3.106842105263158e-07, "loss": 0.4327, "step": 4100 }, { "epoch": 12.65, "learning_rate": 3.0936842105263157e-07, "loss": 0.4155, "step": 4125 }, { "epoch": 12.73, "learning_rate": 3.0805263157894736e-07, "loss": 0.4278, "step": 4150 }, { "epoch": 12.81, "learning_rate": 3.0673684210526315e-07, "loss": 0.4111, "step": 4175 }, { "epoch": 12.88, "learning_rate": 3.054210526315789e-07, "loss": 0.4332, "step": 4200 }, { "epoch": 12.96, "learning_rate": 3.041052631578947e-07, "loss": 0.4109, "step": 4225 }, { "epoch": 13.04, "learning_rate": 3.027894736842105e-07, "loss": 0.4247, "step": 4250 }, { "epoch": 13.11, "learning_rate": 3.014736842105263e-07, "loss": 0.4092, "step": 4275 }, { "epoch": 13.19, "learning_rate": 3.001578947368421e-07, "loss": 0.4278, "step": 4300 }, { "epoch": 13.27, "learning_rate": 2.988421052631579e-07, "loss": 0.4198, "step": 4325 }, { "epoch": 13.34, "learning_rate": 2.975263157894737e-07, "loss": 0.4165, "step": 4350 }, { "epoch": 13.42, "learning_rate": 2.962105263157895e-07, "loss": 0.4175, "step": 4375 }, { "epoch": 13.5, "learning_rate": 2.9489473684210527e-07, "loss": 0.4178, "step": 4400 }, { "epoch": 13.57, "learning_rate": 2.9357894736842106e-07, "loss": 0.4227, "step": 4425 }, { "epoch": 13.65, "learning_rate": 2.922631578947368e-07, "loss": 0.406, "step": 4450 }, { "epoch": 13.73, "learning_rate": 2.909473684210526e-07, "loss": 0.4042, "step": 4475 }, { "epoch": 13.8, "learning_rate": 2.896315789473684e-07, "loss": 0.4125, "step": 4500 }, { "epoch": 13.8, "eval_loss": 0.610640287399292, "eval_runtime": 949.6342, "eval_samples_per_second": 11.021, "eval_steps_per_second": 0.345, "eval_wer": 83.28649787889432, "step": 4500 }, { "epoch": 13.88, "learning_rate": 2.8831578947368417e-07, "loss": 0.4217, "step": 4525 }, { "epoch": 13.96, "learning_rate": 2.8699999999999996e-07, "loss": 0.4099, "step": 4550 }, { "epoch": 14.03, "learning_rate": 2.8568421052631575e-07, "loss": 0.4176, "step": 4575 }, { "epoch": 14.11, "learning_rate": 2.843684210526316e-07, "loss": 0.4158, "step": 4600 }, { "epoch": 14.19, "learning_rate": 2.830526315789474e-07, "loss": 0.4181, "step": 4625 }, { "epoch": 14.26, "learning_rate": 2.817368421052632e-07, "loss": 0.396, "step": 4650 }, { "epoch": 14.34, "learning_rate": 2.8042105263157897e-07, "loss": 0.4011, "step": 4675 }, { "epoch": 14.42, "learning_rate": 2.791052631578947e-07, "loss": 0.4069, "step": 4700 }, { "epoch": 14.49, "learning_rate": 2.777894736842105e-07, "loss": 0.4109, "step": 4725 }, { "epoch": 14.57, "learning_rate": 2.764736842105263e-07, "loss": 0.4098, "step": 4750 }, { "epoch": 14.65, "learning_rate": 2.751578947368421e-07, "loss": 0.4093, "step": 4775 }, { "epoch": 14.72, "learning_rate": 2.7384210526315787e-07, "loss": 0.4101, "step": 4800 }, { "epoch": 14.8, "learning_rate": 2.7252631578947366e-07, "loss": 0.3947, "step": 4825 }, { "epoch": 14.88, "learning_rate": 2.7121052631578945e-07, "loss": 0.4141, "step": 4850 }, { "epoch": 14.95, "learning_rate": 2.6989473684210524e-07, "loss": 0.414, "step": 4875 }, { "epoch": 15.03, "learning_rate": 2.6857894736842104e-07, "loss": 0.4074, "step": 4900 }, { "epoch": 15.11, "learning_rate": 2.672631578947369e-07, "loss": 0.4, "step": 4925 }, { "epoch": 15.18, "learning_rate": 2.659473684210526e-07, "loss": 0.4125, "step": 4950 }, { "epoch": 15.26, "learning_rate": 2.646315789473684e-07, "loss": 0.3934, "step": 4975 }, { "epoch": 15.34, "learning_rate": 2.633157894736842e-07, "loss": 0.3984, "step": 5000 }, { "epoch": 15.34, "eval_loss": 0.6058948040008545, "eval_runtime": 971.4806, "eval_samples_per_second": 10.773, "eval_steps_per_second": 0.338, "eval_wer": 83.06763219757356, "step": 5000 }, { "epoch": 15.41, "learning_rate": 2.62e-07, "loss": 0.4097, "step": 5025 }, { "epoch": 15.49, "learning_rate": 2.606842105263158e-07, "loss": 0.3892, "step": 5050 }, { "epoch": 15.57, "learning_rate": 2.5936842105263157e-07, "loss": 0.4194, "step": 5075 }, { "epoch": 15.64, "learning_rate": 2.5805263157894736e-07, "loss": 0.3991, "step": 5100 }, { "epoch": 15.72, "learning_rate": 2.5673684210526315e-07, "loss": 0.3985, "step": 5125 }, { "epoch": 15.8, "learning_rate": 2.5542105263157894e-07, "loss": 0.4032, "step": 5150 }, { "epoch": 15.87, "learning_rate": 2.5410526315789474e-07, "loss": 0.4087, "step": 5175 }, { "epoch": 15.95, "learning_rate": 2.527894736842105e-07, "loss": 0.3981, "step": 5200 }, { "epoch": 16.03, "learning_rate": 2.5147368421052626e-07, "loss": 0.4071, "step": 5225 }, { "epoch": 16.1, "learning_rate": 2.5015789473684206e-07, "loss": 0.3962, "step": 5250 }, { "epoch": 16.18, "learning_rate": 2.488421052631579e-07, "loss": 0.3838, "step": 5275 }, { "epoch": 16.26, "learning_rate": 2.475263157894737e-07, "loss": 0.3985, "step": 5300 }, { "epoch": 16.33, "learning_rate": 2.4621052631578943e-07, "loss": 0.3964, "step": 5325 }, { "epoch": 16.41, "learning_rate": 2.4489473684210527e-07, "loss": 0.4088, "step": 5350 }, { "epoch": 16.49, "learning_rate": 2.4357894736842106e-07, "loss": 0.3902, "step": 5375 }, { "epoch": 16.56, "learning_rate": 2.4226315789473685e-07, "loss": 0.4131, "step": 5400 }, { "epoch": 16.64, "learning_rate": 2.4094736842105265e-07, "loss": 0.4108, "step": 5425 }, { "epoch": 16.72, "learning_rate": 2.396315789473684e-07, "loss": 0.395, "step": 5450 }, { "epoch": 16.79, "learning_rate": 2.383157894736842e-07, "loss": 0.3945, "step": 5475 }, { "epoch": 16.87, "learning_rate": 2.3699999999999996e-07, "loss": 0.4035, "step": 5500 }, { "epoch": 16.87, "eval_loss": 0.6007612347602844, "eval_runtime": 964.162, "eval_samples_per_second": 10.855, "eval_steps_per_second": 0.34, "eval_wer": 82.21648788132616, "step": 5500 }, { "epoch": 16.95, "learning_rate": 2.3568421052631578e-07, "loss": 0.3914, "step": 5525 }, { "epoch": 17.02, "learning_rate": 2.3436842105263157e-07, "loss": 0.3901, "step": 5550 }, { "epoch": 17.1, "learning_rate": 2.3305263157894736e-07, "loss": 0.3923, "step": 5575 }, { "epoch": 17.18, "learning_rate": 2.3173684210526316e-07, "loss": 0.397, "step": 5600 }, { "epoch": 17.25, "learning_rate": 2.3042105263157892e-07, "loss": 0.392, "step": 5625 }, { "epoch": 17.33, "learning_rate": 2.291052631578947e-07, "loss": 0.3992, "step": 5650 }, { "epoch": 17.41, "learning_rate": 2.277894736842105e-07, "loss": 0.3902, "step": 5675 }, { "epoch": 17.48, "learning_rate": 2.2647368421052632e-07, "loss": 0.4042, "step": 5700 }, { "epoch": 17.56, "learning_rate": 2.251578947368421e-07, "loss": 0.3865, "step": 5725 }, { "epoch": 17.64, "learning_rate": 2.238421052631579e-07, "loss": 0.3879, "step": 5750 }, { "epoch": 17.71, "learning_rate": 2.2252631578947367e-07, "loss": 0.3935, "step": 5775 }, { "epoch": 17.79, "learning_rate": 2.2121052631578946e-07, "loss": 0.3882, "step": 5800 }, { "epoch": 17.87, "learning_rate": 2.1989473684210525e-07, "loss": 0.3892, "step": 5825 }, { "epoch": 17.94, "learning_rate": 2.1857894736842104e-07, "loss": 0.4042, "step": 5850 }, { "epoch": 18.02, "learning_rate": 2.1726315789473686e-07, "loss": 0.3914, "step": 5875 }, { "epoch": 18.1, "learning_rate": 2.1594736842105262e-07, "loss": 0.3809, "step": 5900 }, { "epoch": 18.17, "learning_rate": 2.146315789473684e-07, "loss": 0.3822, "step": 5925 }, { "epoch": 18.25, "learning_rate": 2.133157894736842e-07, "loss": 0.3781, "step": 5950 }, { "epoch": 18.33, "learning_rate": 2.12e-07, "loss": 0.3925, "step": 5975 }, { "epoch": 18.4, "learning_rate": 2.1068421052631576e-07, "loss": 0.3997, "step": 6000 }, { "epoch": 18.4, "eval_loss": 0.5970112085342407, "eval_runtime": 953.0961, "eval_samples_per_second": 10.981, "eval_steps_per_second": 0.344, "eval_wer": 81.11945742927395, "step": 6000 }, { "epoch": 18.48, "learning_rate": 2.0936842105263157e-07, "loss": 0.3887, "step": 6025 }, { "epoch": 18.56, "learning_rate": 2.0805263157894737e-07, "loss": 0.389, "step": 6050 }, { "epoch": 18.63, "learning_rate": 2.0673684210526316e-07, "loss": 0.3827, "step": 6075 }, { "epoch": 18.71, "learning_rate": 2.0542105263157895e-07, "loss": 0.3913, "step": 6100 }, { "epoch": 18.79, "learning_rate": 2.041052631578947e-07, "loss": 0.3842, "step": 6125 }, { "epoch": 18.87, "learning_rate": 2.027894736842105e-07, "loss": 0.3855, "step": 6150 }, { "epoch": 18.94, "learning_rate": 2.0152631578947368e-07, "loss": 0.3961, "step": 6175 }, { "epoch": 19.02, "learning_rate": 2.0021052631578947e-07, "loss": 0.3843, "step": 6200 }, { "epoch": 19.1, "learning_rate": 1.9889473684210526e-07, "loss": 0.3861, "step": 6225 }, { "epoch": 19.17, "learning_rate": 1.9757894736842105e-07, "loss": 0.3877, "step": 6250 }, { "epoch": 19.25, "learning_rate": 1.962631578947368e-07, "loss": 0.3944, "step": 6275 }, { "epoch": 19.33, "learning_rate": 1.949473684210526e-07, "loss": 0.3927, "step": 6300 }, { "epoch": 19.4, "learning_rate": 1.9363157894736842e-07, "loss": 0.3829, "step": 6325 }, { "epoch": 19.48, "learning_rate": 1.923157894736842e-07, "loss": 0.3814, "step": 6350 }, { "epoch": 19.56, "learning_rate": 1.91e-07, "loss": 0.3812, "step": 6375 }, { "epoch": 19.63, "learning_rate": 1.8968421052631577e-07, "loss": 0.3874, "step": 6400 }, { "epoch": 19.71, "learning_rate": 1.8836842105263156e-07, "loss": 0.3792, "step": 6425 }, { "epoch": 19.79, "learning_rate": 1.8705263157894735e-07, "loss": 0.379, "step": 6450 }, { "epoch": 19.86, "learning_rate": 1.8573684210526314e-07, "loss": 0.3804, "step": 6475 }, { "epoch": 19.94, "learning_rate": 1.8442105263157896e-07, "loss": 0.3878, "step": 6500 }, { "epoch": 19.94, "eval_loss": 0.5940667986869812, "eval_runtime": 960.8217, "eval_samples_per_second": 10.893, "eval_steps_per_second": 0.341, "eval_wer": 81.71525845064714, "step": 6500 }, { "epoch": 20.02, "learning_rate": 1.8310526315789472e-07, "loss": 0.3745, "step": 6525 }, { "epoch": 20.09, "learning_rate": 1.817894736842105e-07, "loss": 0.4015, "step": 6550 }, { "epoch": 20.17, "learning_rate": 1.804736842105263e-07, "loss": 0.3813, "step": 6575 }, { "epoch": 20.25, "learning_rate": 1.791578947368421e-07, "loss": 0.376, "step": 6600 }, { "epoch": 20.32, "learning_rate": 1.7784210526315789e-07, "loss": 0.3937, "step": 6625 }, { "epoch": 20.4, "learning_rate": 1.7652631578947368e-07, "loss": 0.3875, "step": 6650 }, { "epoch": 20.48, "learning_rate": 1.7521052631578947e-07, "loss": 0.3912, "step": 6675 }, { "epoch": 20.55, "learning_rate": 1.7389473684210526e-07, "loss": 0.3649, "step": 6700 }, { "epoch": 20.63, "learning_rate": 1.7257894736842105e-07, "loss": 0.3753, "step": 6725 }, { "epoch": 20.71, "learning_rate": 1.7126315789473684e-07, "loss": 0.3711, "step": 6750 }, { "epoch": 20.78, "learning_rate": 1.699473684210526e-07, "loss": 0.382, "step": 6775 }, { "epoch": 20.86, "learning_rate": 1.686315789473684e-07, "loss": 0.3727, "step": 6800 }, { "epoch": 20.94, "learning_rate": 1.673157894736842e-07, "loss": 0.3706, "step": 6825 }, { "epoch": 21.01, "learning_rate": 1.66e-07, "loss": 0.3888, "step": 6850 }, { "epoch": 21.09, "learning_rate": 1.646842105263158e-07, "loss": 0.3676, "step": 6875 }, { "epoch": 21.17, "learning_rate": 1.6336842105263156e-07, "loss": 0.3791, "step": 6900 }, { "epoch": 21.24, "learning_rate": 1.6205263157894735e-07, "loss": 0.3793, "step": 6925 }, { "epoch": 21.32, "learning_rate": 1.6073684210526314e-07, "loss": 0.3671, "step": 6950 }, { "epoch": 21.4, "learning_rate": 1.5942105263157893e-07, "loss": 0.3875, "step": 6975 }, { "epoch": 21.47, "learning_rate": 1.5810526315789475e-07, "loss": 0.3827, "step": 7000 }, { "epoch": 21.47, "eval_loss": 0.5906410217285156, "eval_runtime": 960.7164, "eval_samples_per_second": 10.894, "eval_steps_per_second": 0.341, "eval_wer": 81.25591072441838, "step": 7000 }, { "epoch": 21.55, "learning_rate": 1.5678947368421051e-07, "loss": 0.3674, "step": 7025 }, { "epoch": 21.63, "learning_rate": 1.554736842105263e-07, "loss": 0.3851, "step": 7050 }, { "epoch": 21.7, "learning_rate": 1.541578947368421e-07, "loss": 0.3878, "step": 7075 }, { "epoch": 21.78, "learning_rate": 1.528421052631579e-07, "loss": 0.3724, "step": 7100 }, { "epoch": 21.86, "learning_rate": 1.5152631578947368e-07, "loss": 0.363, "step": 7125 }, { "epoch": 21.93, "learning_rate": 1.5021052631578944e-07, "loss": 0.3789, "step": 7150 }, { "epoch": 22.01, "learning_rate": 1.4889473684210526e-07, "loss": 0.3889, "step": 7175 }, { "epoch": 22.09, "learning_rate": 1.4757894736842105e-07, "loss": 0.3689, "step": 7200 }, { "epoch": 22.16, "learning_rate": 1.4626315789473684e-07, "loss": 0.382, "step": 7225 }, { "epoch": 22.24, "learning_rate": 1.4494736842105263e-07, "loss": 0.3682, "step": 7250 }, { "epoch": 22.32, "learning_rate": 1.436315789473684e-07, "loss": 0.3727, "step": 7275 }, { "epoch": 22.39, "learning_rate": 1.423157894736842e-07, "loss": 0.3831, "step": 7300 }, { "epoch": 22.47, "learning_rate": 1.4099999999999998e-07, "loss": 0.367, "step": 7325 }, { "epoch": 22.55, "learning_rate": 1.396842105263158e-07, "loss": 0.3853, "step": 7350 }, { "epoch": 22.62, "learning_rate": 1.383684210526316e-07, "loss": 0.3572, "step": 7375 }, { "epoch": 22.7, "learning_rate": 1.3705263157894735e-07, "loss": 0.3779, "step": 7400 }, { "epoch": 22.78, "learning_rate": 1.3573684210526314e-07, "loss": 0.3811, "step": 7425 }, { "epoch": 22.85, "learning_rate": 1.3442105263157893e-07, "loss": 0.3765, "step": 7450 }, { "epoch": 22.93, "learning_rate": 1.3310526315789472e-07, "loss": 0.3663, "step": 7475 }, { "epoch": 23.01, "learning_rate": 1.3178947368421054e-07, "loss": 0.3785, "step": 7500 }, { "epoch": 23.01, "eval_loss": 0.5891919732093811, "eval_runtime": 940.1062, "eval_samples_per_second": 11.133, "eval_steps_per_second": 0.349, "eval_wer": 81.05055527033966, "step": 7500 }, { "epoch": 23.08, "learning_rate": 1.304736842105263e-07, "loss": 0.3617, "step": 7525 }, { "epoch": 23.16, "learning_rate": 1.291578947368421e-07, "loss": 0.3755, "step": 7550 }, { "epoch": 23.24, "learning_rate": 1.278421052631579e-07, "loss": 0.3732, "step": 7575 }, { "epoch": 23.31, "learning_rate": 1.2652631578947368e-07, "loss": 0.3634, "step": 7600 }, { "epoch": 23.39, "learning_rate": 1.2521052631578947e-07, "loss": 0.3765, "step": 7625 }, { "epoch": 23.47, "learning_rate": 1.2389473684210526e-07, "loss": 0.3699, "step": 7650 }, { "epoch": 23.54, "learning_rate": 1.2257894736842105e-07, "loss": 0.3794, "step": 7675 }, { "epoch": 23.62, "learning_rate": 1.2126315789473684e-07, "loss": 0.3754, "step": 7700 }, { "epoch": 23.7, "learning_rate": 1.1994736842105263e-07, "loss": 0.3637, "step": 7725 }, { "epoch": 23.77, "learning_rate": 1.1863157894736841e-07, "loss": 0.37, "step": 7750 }, { "epoch": 23.85, "learning_rate": 1.173157894736842e-07, "loss": 0.3711, "step": 7775 }, { "epoch": 23.93, "learning_rate": 1.16e-07, "loss": 0.3832, "step": 7800 }, { "epoch": 24.0, "learning_rate": 1.1468421052631578e-07, "loss": 0.3668, "step": 7825 }, { "epoch": 24.08, "learning_rate": 1.1336842105263158e-07, "loss": 0.3636, "step": 7850 }, { "epoch": 24.16, "learning_rate": 1.1205263157894735e-07, "loss": 0.3587, "step": 7875 }, { "epoch": 24.23, "learning_rate": 1.1073684210526316e-07, "loss": 0.3716, "step": 7900 }, { "epoch": 24.31, "learning_rate": 1.0942105263157895e-07, "loss": 0.3652, "step": 7925 }, { "epoch": 24.39, "learning_rate": 1.0810526315789473e-07, "loss": 0.3662, "step": 7950 }, { "epoch": 24.46, "learning_rate": 1.0678947368421053e-07, "loss": 0.3646, "step": 7975 }, { "epoch": 24.54, "learning_rate": 1.0547368421052631e-07, "loss": 0.372, "step": 8000 }, { "epoch": 24.54, "eval_loss": 0.5882065296173096, "eval_runtime": 953.6514, "eval_samples_per_second": 10.975, "eval_steps_per_second": 0.344, "eval_wer": 81.42478856494367, "step": 8000 }, { "epoch": 24.62, "learning_rate": 1.041578947368421e-07, "loss": 0.3793, "step": 8025 }, { "epoch": 24.69, "learning_rate": 1.028421052631579e-07, "loss": 0.3814, "step": 8050 }, { "epoch": 24.77, "learning_rate": 1.0152631578947368e-07, "loss": 0.3741, "step": 8075 }, { "epoch": 24.85, "learning_rate": 1.0021052631578947e-07, "loss": 0.371, "step": 8100 }, { "epoch": 24.92, "learning_rate": 9.889473684210525e-08, "loss": 0.3745, "step": 8125 }, { "epoch": 25.0, "learning_rate": 9.757894736842105e-08, "loss": 0.3667, "step": 8150 }, { "epoch": 25.08, "learning_rate": 9.626315789473684e-08, "loss": 0.3656, "step": 8175 }, { "epoch": 25.15, "learning_rate": 9.494736842105262e-08, "loss": 0.3628, "step": 8200 }, { "epoch": 25.23, "learning_rate": 9.363157894736843e-08, "loss": 0.3729, "step": 8225 }, { "epoch": 25.31, "learning_rate": 9.23157894736842e-08, "loss": 0.3739, "step": 8250 }, { "epoch": 25.38, "learning_rate": 9.1e-08, "loss": 0.364, "step": 8275 }, { "epoch": 25.46, "learning_rate": 8.968421052631579e-08, "loss": 0.3618, "step": 8300 }, { "epoch": 25.54, "learning_rate": 8.836842105263158e-08, "loss": 0.3719, "step": 8325 }, { "epoch": 25.61, "learning_rate": 8.705263157894737e-08, "loss": 0.3684, "step": 8350 }, { "epoch": 25.69, "learning_rate": 8.573684210526314e-08, "loss": 0.3676, "step": 8375 }, { "epoch": 25.77, "learning_rate": 8.442105263157895e-08, "loss": 0.3728, "step": 8400 }, { "epoch": 25.84, "learning_rate": 8.310526315789474e-08, "loss": 0.3789, "step": 8425 }, { "epoch": 25.92, "learning_rate": 8.178947368421052e-08, "loss": 0.3666, "step": 8450 }, { "epoch": 26.0, "learning_rate": 8.047368421052632e-08, "loss": 0.3661, "step": 8475 }, { "epoch": 26.07, "learning_rate": 7.91578947368421e-08, "loss": 0.3655, "step": 8500 }, { "epoch": 26.07, "eval_loss": 0.5864801406860352, "eval_runtime": 968.3356, "eval_samples_per_second": 10.808, "eval_steps_per_second": 0.339, "eval_wer": 81.04785322489124, "step": 8500 }, { "epoch": 26.15, "learning_rate": 7.784210526315789e-08, "loss": 0.3658, "step": 8525 }, { "epoch": 26.23, "learning_rate": 7.652631578947368e-08, "loss": 0.37, "step": 8550 }, { "epoch": 26.3, "learning_rate": 7.521052631578947e-08, "loss": 0.3783, "step": 8575 }, { "epoch": 26.38, "learning_rate": 7.389473684210526e-08, "loss": 0.3623, "step": 8600 }, { "epoch": 26.46, "learning_rate": 7.257894736842104e-08, "loss": 0.3572, "step": 8625 }, { "epoch": 26.53, "learning_rate": 7.13157894736842e-08, "loss": 0.3655, "step": 8650 }, { "epoch": 26.61, "learning_rate": 7e-08, "loss": 0.364, "step": 8675 }, { "epoch": 26.69, "learning_rate": 6.868421052631578e-08, "loss": 0.3736, "step": 8700 }, { "epoch": 26.76, "learning_rate": 6.736842105263157e-08, "loss": 0.3624, "step": 8725 }, { "epoch": 26.84, "learning_rate": 6.605263157894736e-08, "loss": 0.3618, "step": 8750 }, { "epoch": 26.92, "learning_rate": 6.473684210526316e-08, "loss": 0.3719, "step": 8775 }, { "epoch": 26.99, "learning_rate": 6.342105263157895e-08, "loss": 0.3625, "step": 8800 }, { "epoch": 27.07, "learning_rate": 6.210526315789474e-08, "loss": 0.3668, "step": 8825 }, { "epoch": 27.15, "learning_rate": 6.078947368421053e-08, "loss": 0.3632, "step": 8850 }, { "epoch": 27.22, "learning_rate": 5.947368421052631e-08, "loss": 0.3508, "step": 8875 }, { "epoch": 27.3, "learning_rate": 5.81578947368421e-08, "loss": 0.3612, "step": 8900 }, { "epoch": 27.38, "learning_rate": 5.6842105263157894e-08, "loss": 0.3604, "step": 8925 }, { "epoch": 27.45, "learning_rate": 5.5526315789473685e-08, "loss": 0.375, "step": 8950 }, { "epoch": 27.53, "learning_rate": 5.421052631578947e-08, "loss": 0.364, "step": 8975 }, { "epoch": 27.61, "learning_rate": 5.289473684210526e-08, "loss": 0.3697, "step": 9000 }, { "epoch": 27.61, "eval_loss": 0.5856062173843384, "eval_runtime": 963.5096, "eval_samples_per_second": 10.862, "eval_steps_per_second": 0.34, "eval_wer": 80.47772163527792, "step": 9000 }, { "epoch": 27.68, "learning_rate": 5.157894736842105e-08, "loss": 0.3692, "step": 9025 }, { "epoch": 27.76, "learning_rate": 5.026315789473684e-08, "loss": 0.3826, "step": 9050 }, { "epoch": 27.84, "learning_rate": 4.8947368421052627e-08, "loss": 0.3615, "step": 9075 }, { "epoch": 27.91, "learning_rate": 4.763157894736842e-08, "loss": 0.3779, "step": 9100 }, { "epoch": 27.99, "learning_rate": 4.631578947368421e-08, "loss": 0.3581, "step": 9125 }, { "epoch": 28.07, "learning_rate": 4.5e-08, "loss": 0.37, "step": 9150 }, { "epoch": 28.14, "learning_rate": 4.368421052631579e-08, "loss": 0.3787, "step": 9175 }, { "epoch": 28.22, "learning_rate": 4.2368421052631575e-08, "loss": 0.3578, "step": 9200 }, { "epoch": 28.3, "learning_rate": 4.1052631578947365e-08, "loss": 0.3708, "step": 9225 }, { "epoch": 28.37, "learning_rate": 3.9736842105263156e-08, "loss": 0.3706, "step": 9250 }, { "epoch": 28.45, "learning_rate": 3.842105263157895e-08, "loss": 0.3737, "step": 9275 }, { "epoch": 28.53, "learning_rate": 3.710526315789474e-08, "loss": 0.3629, "step": 9300 }, { "epoch": 28.6, "learning_rate": 3.578947368421052e-08, "loss": 0.3522, "step": 9325 }, { "epoch": 28.68, "learning_rate": 3.4473684210526313e-08, "loss": 0.3641, "step": 9350 }, { "epoch": 28.76, "learning_rate": 3.3157894736842104e-08, "loss": 0.366, "step": 9375 }, { "epoch": 28.83, "learning_rate": 3.1842105263157895e-08, "loss": 0.3646, "step": 9400 }, { "epoch": 28.91, "learning_rate": 3.052631578947368e-08, "loss": 0.3718, "step": 9425 }, { "epoch": 28.99, "learning_rate": 2.9210526315789474e-08, "loss": 0.3461, "step": 9450 }, { "epoch": 29.06, "learning_rate": 2.789473684210526e-08, "loss": 0.3729, "step": 9475 }, { "epoch": 29.14, "learning_rate": 2.6578947368421052e-08, "loss": 0.3658, "step": 9500 }, { "epoch": 29.14, "eval_loss": 0.5849200487136841, "eval_runtime": 977.9456, "eval_samples_per_second": 10.702, "eval_steps_per_second": 0.335, "eval_wer": 80.61282390769813, "step": 9500 }, { "epoch": 29.22, "learning_rate": 2.526315789473684e-08, "loss": 0.3671, "step": 9525 }, { "epoch": 29.29, "learning_rate": 2.394736842105263e-08, "loss": 0.3582, "step": 9550 }, { "epoch": 29.37, "learning_rate": 2.2631578947368422e-08, "loss": 0.354, "step": 9575 }, { "epoch": 29.45, "learning_rate": 2.131578947368421e-08, "loss": 0.3532, "step": 9600 }, { "epoch": 29.52, "learning_rate": 2e-08, "loss": 0.3523, "step": 9625 }, { "epoch": 29.6, "learning_rate": 1.8684210526315788e-08, "loss": 0.3627, "step": 9650 }, { "epoch": 29.68, "learning_rate": 1.736842105263158e-08, "loss": 0.3725, "step": 9675 }, { "epoch": 29.75, "learning_rate": 1.6052631578947367e-08, "loss": 0.3643, "step": 9700 }, { "epoch": 29.83, "learning_rate": 1.4736842105263158e-08, "loss": 0.3669, "step": 9725 }, { "epoch": 29.91, "learning_rate": 1.3421052631578947e-08, "loss": 0.3732, "step": 9750 }, { "epoch": 29.98, "learning_rate": 1.2105263157894736e-08, "loss": 0.3679, "step": 9775 }, { "epoch": 30.06, "learning_rate": 1.0789473684210525e-08, "loss": 0.3789, "step": 9800 }, { "epoch": 30.14, "learning_rate": 9.473684210526316e-09, "loss": 0.375, "step": 9825 }, { "epoch": 30.21, "learning_rate": 8.157894736842106e-09, "loss": 0.3638, "step": 9850 }, { "epoch": 30.29, "learning_rate": 6.842105263157895e-09, "loss": 0.3734, "step": 9875 }, { "epoch": 30.37, "learning_rate": 5.526315789473683e-09, "loss": 0.3662, "step": 9900 }, { "epoch": 30.44, "learning_rate": 4.210526315789473e-09, "loss": 0.3435, "step": 9925 }, { "epoch": 30.52, "learning_rate": 2.894736842105263e-09, "loss": 0.3724, "step": 9950 }, { "epoch": 30.6, "learning_rate": 1.5789473684210525e-09, "loss": 0.3587, "step": 9975 }, { "epoch": 30.67, "learning_rate": 2.631578947368421e-10, "loss": 0.3539, "step": 10000 }, { "epoch": 30.67, "eval_loss": 0.5847803354263306, "eval_runtime": 974.3996, "eval_samples_per_second": 10.741, "eval_steps_per_second": 0.337, "eval_wer": 80.66956686211462, "step": 10000 }, { "epoch": 30.67, "step": 10000, "total_flos": 4.14675899449344e+19, "train_loss": 0.462699613571167, "train_runtime": 47184.4648, "train_samples_per_second": 13.564, "train_steps_per_second": 0.212 } ], "logging_steps": 25, "max_steps": 10000, "num_input_tokens_seen": 0, "num_train_epochs": 31, "save_steps": 500, "total_flos": 4.14675899449344e+19, "train_batch_size": 32, "trial_name": null, "trial_params": null }