diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,12925 +1,175 @@ { - "best_metric": 24.878197320341048, - "best_model_checkpoint": "./checkpoint-98000", - "epoch": 4.00848, - "global_step": 100000, + "best_metric": 51.37028014616322, + "best_model_checkpoint": "./checkpoint-2000", + "epoch": 1.0, + "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { - "epoch": 0.0, - "learning_rate": 1.5000000000000002e-07, - "loss": 2.936, - "step": 50 - }, - { - "epoch": 0.0, - "learning_rate": 3.0000000000000004e-07, - "loss": 2.8713, - "step": 100 - }, - { - "epoch": 0.0, - "learning_rate": 4.5e-07, - "loss": 2.7643, - "step": 150 - }, - { - "epoch": 0.0, - "learning_rate": 6.000000000000001e-07, - "loss": 2.6478, + "epoch": 0.1, + "learning_rate": 2.9850000000000002e-06, + "loss": 2.4574, "step": 200 }, { - "epoch": 0.0, - "learning_rate": 7.5e-07, - "loss": 2.5266, - "step": 250 - }, - { - "epoch": 0.0, - "learning_rate": 9e-07, - "loss": 2.4391, - "step": 300 - }, - { - "epoch": 0.0, - "learning_rate": 1.05e-06, - "loss": 2.3618, - "step": 350 + "epoch": 0.1, + "eval_loss": 1.4662598371505737, + "eval_runtime": 21.5397, + "eval_samples_per_second": 7.475, + "eval_steps_per_second": 0.279, + "eval_wer": 71.65042630937882, + "step": 200 }, { - "epoch": 0.0, - "learning_rate": 1.2000000000000002e-06, - "loss": 2.2763, + "epoch": 0.2, + "learning_rate": 3e-06, + "loss": 1.9587, "step": 400 }, { - "epoch": 0.0, - "learning_rate": 1.35e-06, - "loss": 2.2461, - "step": 450 - }, - { - "epoch": 0.01, - "learning_rate": 1.5e-06, - "loss": 2.1815, - "step": 500 - }, - { - "epoch": 0.01, - "learning_rate": 1.65e-06, - "loss": 2.1294, - "step": 550 + "epoch": 0.2, + "eval_loss": 1.2580980062484741, + "eval_runtime": 16.6116, + "eval_samples_per_second": 9.692, + "eval_steps_per_second": 0.361, + "eval_wer": 64.73812423873325, + "step": 400 }, { - "epoch": 0.01, - "learning_rate": 1.8e-06, - "loss": 2.0958, + "epoch": 0.3, + "learning_rate": 3e-06, + "loss": 1.816, "step": 600 }, { - "epoch": 0.01, - "learning_rate": 1.95e-06, - "loss": 2.0656, - "step": 650 - }, - { - "epoch": 0.01, - "learning_rate": 2.1e-06, - "loss": 2.0392, - "step": 700 - }, - { - "epoch": 0.01, - "learning_rate": 2.25e-06, - "loss": 2.0037, - "step": 750 + "epoch": 0.3, + "eval_loss": 1.1671593189239502, + "eval_runtime": 17.1526, + "eval_samples_per_second": 9.386, + "eval_steps_per_second": 0.35, + "eval_wer": 60.93179049939099, + "step": 600 }, { - "epoch": 0.01, - "learning_rate": 2.4000000000000003e-06, - "loss": 1.9791, + "epoch": 0.4, + "learning_rate": 3e-06, + "loss": 1.7199, "step": 800 }, { - "epoch": 0.01, - "learning_rate": 2.55e-06, - "loss": 1.9636, - "step": 850 - }, - { - "epoch": 0.01, - "learning_rate": 2.7e-06, - "loss": 1.9405, - "step": 900 - }, - { - "epoch": 0.01, - "learning_rate": 2.85e-06, - "loss": 1.9216, - "step": 950 + "epoch": 0.4, + "eval_loss": 1.100619912147522, + "eval_runtime": 15.3573, + "eval_samples_per_second": 10.484, + "eval_steps_per_second": 0.391, + "eval_wer": 57.673568818514006, + "step": 800 }, { - "epoch": 0.01, + "epoch": 0.5, "learning_rate": 3e-06, - "loss": 1.8819, + "loss": 1.6686, "step": 1000 }, { - "epoch": 0.01, - "eval_loss": 1.1868911981582642, - "eval_runtime": 54.8495, - "eval_samples_per_second": 2.935, - "eval_steps_per_second": 0.055, - "eval_wer": 61.96711327649208, + "epoch": 0.5, + "eval_loss": 1.0630302429199219, + "eval_runtime": 16.3877, + "eval_samples_per_second": 9.824, + "eval_steps_per_second": 0.366, + "eval_wer": 56.18148599269184, "step": 1000 }, { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 1.8776, - "step": 1050 - }, - { - "epoch": 0.01, + "epoch": 0.6, "learning_rate": 3e-06, - "loss": 1.8508, - "step": 1100 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 1.8361, - "step": 1150 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 1.8191, + "loss": 1.621, "step": 1200 }, { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 1.7992, - "step": 1250 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 1.7928, - "step": 1300 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 1.7808, - "step": 1350 + "epoch": 0.6, + "eval_loss": 1.0273046493530273, + "eval_runtime": 16.6903, + "eval_samples_per_second": 9.646, + "eval_steps_per_second": 0.359, + "eval_wer": 55.481120584652864, + "step": 1200 }, { - "epoch": 0.01, + "epoch": 0.7, "learning_rate": 3e-06, - "loss": 1.7558, + "loss": 1.5846, "step": 1400 }, { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 1.7655, - "step": 1450 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 1.7327, - "step": 1500 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 1.7395, - "step": 1550 + "epoch": 0.7, + "eval_loss": 1.0016604661941528, + "eval_runtime": 16.3415, + "eval_samples_per_second": 9.852, + "eval_steps_per_second": 0.367, + "eval_wer": 53.98903775883069, + "step": 1400 }, { - "epoch": 0.02, + "epoch": 0.8, "learning_rate": 3e-06, - "loss": 1.7301, + "loss": 1.5482, "step": 1600 }, { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 1.7016, - "step": 1650 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 1.6962, - "step": 1700 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 1.6992, - "step": 1750 + "epoch": 0.8, + "eval_loss": 0.9773023128509521, + "eval_runtime": 16.9737, + "eval_samples_per_second": 9.485, + "eval_steps_per_second": 0.353, + "eval_wer": 53.014616321559075, + "step": 1600 }, { - "epoch": 0.02, + "epoch": 0.9, "learning_rate": 3e-06, - "loss": 1.6888, + "loss": 1.521, "step": 1800 }, { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 1.6646, - "step": 1850 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 1.6713, - "step": 1900 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 1.6562, - "step": 1950 + "epoch": 0.9, + "eval_loss": 0.9574936628341675, + "eval_runtime": 18.1323, + "eval_samples_per_second": 8.879, + "eval_steps_per_second": 0.331, + "eval_wer": 52.10109622411693, + "step": 1800 }, { - "epoch": 0.02, + "epoch": 1.0, "learning_rate": 3e-06, - "loss": 1.6425, + "loss": 1.4932, "step": 2000 }, { - "epoch": 0.02, - "eval_loss": 0.9990558624267578, - "eval_runtime": 38.5229, - "eval_samples_per_second": 4.179, - "eval_steps_per_second": 0.078, - "eval_wer": 53.65408038976858, + "epoch": 1.0, + "eval_loss": 0.9374914765357971, + "eval_runtime": 15.2056, + "eval_samples_per_second": 10.588, + "eval_steps_per_second": 0.395, + "eval_wer": 51.37028014616322, "step": 2000 }, { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 1.6376, - "step": 2050 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 1.6457, - "step": 2100 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 1.6244, - "step": 2150 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 1.6113, - "step": 2200 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 1.6096, - "step": 2250 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 1.6302, - "step": 2300 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 1.6078, - "step": 2350 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 1.6064, - "step": 2400 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 1.5884, - "step": 2450 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 1.5819, - "step": 2500 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 1.5784, - "step": 2550 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 1.5648, - "step": 2600 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 1.5758, - "step": 2650 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 1.5627, - "step": 2700 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 1.5573, - "step": 2750 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 1.5594, - "step": 2800 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 1.55, - "step": 2850 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 1.5583, - "step": 2900 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 1.5416, - "step": 2950 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 1.548, - "step": 3000 - }, - { - "epoch": 0.03, - "eval_loss": 0.9147223234176636, - "eval_runtime": 37.9927, - "eval_samples_per_second": 4.238, - "eval_steps_per_second": 0.079, - "eval_wer": 50.21315468940317, - "step": 3000 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 1.5347, - "step": 3050 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 1.5435, - "step": 3100 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 1.5246, - "step": 3150 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 1.5326, - "step": 3200 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 1.5094, - "step": 3250 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 1.5061, - "step": 3300 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 1.5139, - "step": 3350 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 1.5055, - "step": 3400 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 1.5109, - "step": 3450 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 1.4999, - "step": 3500 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 1.5015, - "step": 3550 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 1.4853, - "step": 3600 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 1.4836, - "step": 3650 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 1.4874, - "step": 3700 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 1.4758, - "step": 3750 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 1.462, - "step": 3800 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 1.4813, - "step": 3850 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 1.4836, - "step": 3900 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 1.4692, - "step": 3950 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 1.4636, - "step": 4000 - }, - { - "epoch": 0.04, - "eval_loss": 0.8605496287345886, - "eval_runtime": 40.5349, - "eval_samples_per_second": 3.972, - "eval_steps_per_second": 0.074, - "eval_wer": 47.07673568818514, - "step": 4000 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 1.4715, - "step": 4050 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 1.477, - "step": 4100 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 1.4539, - "step": 4150 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 1.4555, - "step": 4200 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 1.4423, - "step": 4250 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 1.4506, - "step": 4300 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 1.4404, - "step": 4350 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 1.4455, - "step": 4400 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 1.4463, - "step": 4450 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 1.4324, - "step": 4500 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 1.4317, - "step": 4550 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 1.4169, - "step": 4600 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 1.4282, - "step": 4650 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 1.4487, - "step": 4700 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 1.4348, - "step": 4750 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 1.4173, - "step": 4800 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 1.4247, - "step": 4850 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 1.4088, - "step": 4900 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 1.4198, - "step": 4950 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 1.4113, - "step": 5000 - }, - { - "epoch": 0.05, - "eval_loss": 0.8252834677696228, - "eval_runtime": 38.0695, - "eval_samples_per_second": 4.229, - "eval_steps_per_second": 0.079, - "eval_wer": 45.73690621193666, - "step": 5000 - }, - { - "epoch": 0.0, - "learning_rate": 3e-06, - "loss": 1.4062, - "step": 5050 - }, - { - "epoch": 0.0, - "learning_rate": 3e-06, - "loss": 1.3963, - "step": 5100 - }, - { - "epoch": 0.0, - "learning_rate": 3e-06, - "loss": 1.4073, - "step": 5150 - }, - { - "epoch": 0.0, - "learning_rate": 3e-06, - "loss": 1.3978, - "step": 5200 - }, - { - "epoch": 0.0, - "learning_rate": 3e-06, - "loss": 1.3838, - "step": 5250 - }, - { - "epoch": 0.0, - "learning_rate": 3e-06, - "loss": 1.3836, - "step": 5300 - }, - { - "epoch": 0.0, - "learning_rate": 3e-06, - "loss": 1.3974, - "step": 5350 - }, - { - "epoch": 0.0, - "learning_rate": 3e-06, - "loss": 1.3793, - "step": 5400 - }, - { - "epoch": 0.0, - "learning_rate": 3e-06, - "loss": 1.4009, - "step": 5450 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 1.3867, - "step": 5500 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 1.3756, - "step": 5550 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 1.3735, - "step": 5600 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 1.3735, - "step": 5650 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 1.3719, - "step": 5700 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 1.3661, - "step": 5750 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 1.3646, - "step": 5800 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 1.373, - "step": 5850 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 1.3669, - "step": 5900 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 1.3658, - "step": 5950 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 1.3484, - "step": 6000 - }, - { - "epoch": 0.01, - "eval_loss": 0.7946101427078247, - "eval_runtime": 39.2317, - "eval_samples_per_second": 4.104, - "eval_steps_per_second": 0.076, - "eval_wer": 43.4531059683313, - "step": 6000 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 1.3619, - "step": 6050 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 1.355, - "step": 6100 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 1.3529, - "step": 6150 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 1.3469, - "step": 6200 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 1.3411, - "step": 6250 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 1.3491, - "step": 6300 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 1.352, - "step": 6350 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 1.3355, - "step": 6400 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 1.3545, - "step": 6450 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 1.3312, - "step": 6500 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 1.3494, - "step": 6550 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 1.3443, - "step": 6600 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 1.3241, - "step": 6650 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 1.3295, - "step": 6700 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 1.3391, - "step": 6750 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 1.3323, - "step": 6800 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 1.3168, - "step": 6850 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 1.3301, - "step": 6900 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 1.3225, - "step": 6950 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 1.3127, - "step": 7000 - }, - { - "epoch": 0.02, - "eval_loss": 0.7740240097045898, - "eval_runtime": 39.5487, - "eval_samples_per_second": 4.071, - "eval_steps_per_second": 0.076, - "eval_wer": 42.265529841656516, - "step": 7000 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 1.314, - "step": 7050 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 1.3271, - "step": 7100 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 1.3121, - "step": 7150 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 1.3037, - "step": 7200 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 1.3075, - "step": 7250 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 1.328, - "step": 7300 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 1.3154, - "step": 7350 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 1.3151, - "step": 7400 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 1.3032, - "step": 7450 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 1.2986, - "step": 7500 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 1.2994, - "step": 7550 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 1.2927, - "step": 7600 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 1.305, - "step": 7650 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 1.2958, - "step": 7700 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 1.2945, - "step": 7750 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 1.2989, - "step": 7800 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 1.2951, - "step": 7850 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 1.305, - "step": 7900 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 1.2923, - "step": 7950 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 1.2994, - "step": 8000 - }, - { - "epoch": 0.03, - "eval_loss": 0.7550716996192932, - "eval_runtime": 39.5194, - "eval_samples_per_second": 4.074, - "eval_steps_per_second": 0.076, - "eval_wer": 40.895249695493305, - "step": 8000 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 1.2896, - "step": 8050 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 1.3027, - "step": 8100 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 1.2868, - "step": 8150 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 1.2963, - "step": 8200 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 1.2774, - "step": 8250 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 1.2754, - "step": 8300 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 1.2844, - "step": 8350 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 1.2806, - "step": 8400 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 1.2889, - "step": 8450 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 1.2804, - "step": 8500 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 1.2835, - "step": 8550 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 1.2706, - "step": 8600 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 1.2696, - "step": 8650 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 1.2776, - "step": 8700 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 1.2686, - "step": 8750 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 1.2566, - "step": 8800 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 1.2762, - "step": 8850 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 1.2805, - "step": 8900 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 1.2679, - "step": 8950 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 1.265, - "step": 9000 - }, - { - "epoch": 0.04, - "eval_loss": 0.737849235534668, - "eval_runtime": 40.6621, - "eval_samples_per_second": 3.959, - "eval_steps_per_second": 0.074, - "eval_wer": 39.859926918392205, - "step": 9000 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 1.2748, - "step": 9050 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 1.2816, - "step": 9100 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 1.2618, - "step": 9150 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 1.2648, - "step": 9200 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 1.2532, - "step": 9250 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 1.2633, - "step": 9300 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 1.2545, - "step": 9350 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 1.2625, - "step": 9400 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 1.2641, - "step": 9450 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 1.2514, - "step": 9500 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 1.2529, - "step": 9550 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 1.2397, - "step": 9600 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 1.2534, - "step": 9650 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 1.273, - "step": 9700 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 1.263, - "step": 9750 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 1.2452, - "step": 9800 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 1.2544, - "step": 9850 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 1.2393, - "step": 9900 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 1.2516, - "step": 9950 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 1.2458, - "step": 10000 - }, - { - "epoch": 0.05, - "eval_loss": 0.7256616353988647, - "eval_runtime": 39.9578, - "eval_samples_per_second": 4.029, - "eval_steps_per_second": 0.075, - "eval_wer": 39.89037758830694, - "step": 10000 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 1.2579, - "step": 10050 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 1.2544, - "step": 10100 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 1.2491, - "step": 10150 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 1.2535, - "step": 10200 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 1.2515, - "step": 10250 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 1.2431, - "step": 10300 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 1.2503, - "step": 10350 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 1.256, - "step": 10400 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 1.2445, - "step": 10450 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 1.2336, - "step": 10500 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 1.237, - "step": 10550 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 1.235, - "step": 10600 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 1.2435, - "step": 10650 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 1.2456, - "step": 10700 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 1.2269, - "step": 10750 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 1.2289, - "step": 10800 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 1.2379, - "step": 10850 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 1.2261, - "step": 10900 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 1.2363, - "step": 10950 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 1.2257, - "step": 11000 - }, - { - "epoch": 0.06, - "eval_loss": 0.7114033102989197, - "eval_runtime": 39.5013, - "eval_samples_per_second": 4.076, - "eval_steps_per_second": 0.076, - "eval_wer": 39.79902557856273, - "step": 11000 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 1.2315, - "step": 11050 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 1.229, - "step": 11100 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 1.2291, - "step": 11150 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 1.2311, - "step": 11200 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 1.2186, - "step": 11250 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 1.2326, - "step": 11300 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 1.2277, - "step": 11350 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 1.2311, - "step": 11400 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 1.2213, - "step": 11450 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 1.2163, - "step": 11500 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 1.2171, - "step": 11550 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 1.2288, - "step": 11600 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 1.2184, - "step": 11650 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 1.2118, - "step": 11700 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 1.2137, - "step": 11750 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 1.211, - "step": 11800 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 1.2264, - "step": 11850 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 1.2133, - "step": 11900 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 1.2104, - "step": 11950 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 1.2126, - "step": 12000 - }, - { - "epoch": 0.07, - "eval_loss": 0.6972322463989258, - "eval_runtime": 40.3496, - "eval_samples_per_second": 3.99, - "eval_steps_per_second": 0.074, - "eval_wer": 37.880633373934224, - "step": 12000 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 1.2101, - "step": 12050 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 1.2027, - "step": 12100 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 1.207, - "step": 12150 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 1.2193, - "step": 12200 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 1.2064, - "step": 12250 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 1.2109, - "step": 12300 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 1.2048, - "step": 12350 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 1.195, - "step": 12400 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 1.2037, - "step": 12450 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 1.1998, - "step": 12500 - }, - { - "epoch": 0.08, - "learning_rate": 3e-06, - "loss": 1.1935, - "step": 12550 - }, - { - "epoch": 0.08, - "learning_rate": 3e-06, - "loss": 1.1955, - "step": 12600 - }, - { - "epoch": 0.08, - "learning_rate": 3e-06, - "loss": 1.1994, - "step": 12650 - }, - { - "epoch": 0.08, - "learning_rate": 3e-06, - "loss": 1.2118, - "step": 12700 - }, - { - "epoch": 0.08, - "learning_rate": 3e-06, - "loss": 1.2017, - "step": 12750 - }, - { - "epoch": 0.08, - "learning_rate": 3e-06, - "loss": 1.2088, - "step": 12800 - }, - { - "epoch": 0.08, - "learning_rate": 3e-06, - "loss": 1.1909, - "step": 12850 - }, - { - "epoch": 0.08, - "learning_rate": 3e-06, - "loss": 1.2045, - "step": 12900 - }, - { - "epoch": 0.08, - "learning_rate": 3e-06, - "loss": 1.2031, - "step": 12950 - }, - { - "epoch": 0.08, - "learning_rate": 3e-06, - "loss": 1.1971, - "step": 13000 - }, - { - "epoch": 0.08, - "eval_loss": 0.6871449947357178, - "eval_runtime": 39.2495, - "eval_samples_per_second": 4.102, - "eval_steps_per_second": 0.076, - "eval_wer": 37.3020706455542, - "step": 13000 - }, - { - "epoch": 0.08, - "learning_rate": 3e-06, - "loss": 1.2081, - "step": 13050 - }, - { - "epoch": 0.08, - "learning_rate": 3e-06, - "loss": 1.1995, - "step": 13100 - }, - { - "epoch": 0.08, - "learning_rate": 3e-06, - "loss": 1.1953, - "step": 13150 - }, - { - "epoch": 0.08, - "learning_rate": 3e-06, - "loss": 1.1944, - "step": 13200 - }, - { - "epoch": 0.08, - "learning_rate": 3e-06, - "loss": 1.1872, - "step": 13250 - }, - { - "epoch": 1.0, - "learning_rate": 3e-06, - "loss": 1.177, - "step": 13300 - }, - { - "epoch": 1.0, - "learning_rate": 3e-06, - "loss": 1.1792, - "step": 13350 - }, - { - "epoch": 1.0, - "learning_rate": 3e-06, - "loss": 1.1717, - "step": 13400 - }, - { - "epoch": 1.0, - "learning_rate": 3e-06, - "loss": 1.1523, - "step": 13450 - }, - { - "epoch": 1.0, - "learning_rate": 3e-06, - "loss": 1.1751, - "step": 13500 - }, - { - "epoch": 1.0, - "learning_rate": 3e-06, - "loss": 1.1815, - "step": 13550 - }, - { - "epoch": 1.0, - "learning_rate": 3e-06, - "loss": 1.183, - "step": 13600 - }, - { - "epoch": 1.0, - "learning_rate": 3e-06, - "loss": 1.17, - "step": 13650 - }, - { - "epoch": 1.0, - "learning_rate": 3e-06, - "loss": 1.1812, - "step": 13700 - }, - { - "epoch": 1.0, - "learning_rate": 3e-06, - "loss": 1.1798, - "step": 13750 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 1.1626, - "step": 13800 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 1.1687, - "step": 13850 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 1.1676, - "step": 13900 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 1.1748, - "step": 13950 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 1.1786, - "step": 14000 - }, - { - "epoch": 1.01, - "eval_loss": 0.6785603761672974, - "eval_runtime": 39.6356, - "eval_samples_per_second": 4.062, - "eval_steps_per_second": 0.076, - "eval_wer": 37.42387332521315, - "step": 14000 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 1.172, - "step": 14050 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 1.1752, - "step": 14100 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 1.1765, - "step": 14150 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 1.1586, - "step": 14200 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 1.1578, - "step": 14250 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 1.1555, - "step": 14300 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 1.1659, - "step": 14350 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 1.1701, - "step": 14400 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 1.1687, - "step": 14450 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 1.1804, - "step": 14500 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 1.1636, - "step": 14550 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 1.1559, - "step": 14600 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 1.1555, - "step": 14650 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 1.1563, - "step": 14700 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 1.1508, - "step": 14750 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 1.1454, - "step": 14800 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 1.1555, - "step": 14850 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 1.1627, - "step": 14900 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 1.1578, - "step": 14950 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 1.1486, - "step": 15000 - }, - { - "epoch": 1.02, - "eval_loss": 0.6702972054481506, - "eval_runtime": 43.4585, - "eval_samples_per_second": 3.705, - "eval_steps_per_second": 0.069, - "eval_wer": 36.99756394640682, - "step": 15000 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 1.1655, - "step": 15050 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 1.1649, - "step": 15100 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 1.1609, - "step": 15150 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 1.1516, - "step": 15200 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 1.1635, - "step": 15250 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 1.1528, - "step": 15300 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 1.1688, - "step": 15350 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 1.1508, - "step": 15400 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 1.1511, - "step": 15450 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 1.1354, - "step": 15500 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 1.1562, - "step": 15550 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 1.1523, - "step": 15600 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 1.1469, - "step": 15650 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 1.1458, - "step": 15700 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 1.1455, - "step": 15750 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 1.1444, - "step": 15800 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 1.1582, - "step": 15850 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 1.1447, - "step": 15900 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 1.1494, - "step": 15950 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 1.1505, - "step": 16000 - }, - { - "epoch": 1.03, - "eval_loss": 0.6647058129310608, - "eval_runtime": 39.1012, - "eval_samples_per_second": 4.118, - "eval_steps_per_second": 0.077, - "eval_wer": 36.35809987819732, - "step": 16000 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 1.148, - "step": 16050 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 1.151, - "step": 16100 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 1.149, - "step": 16150 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 1.1413, - "step": 16200 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 1.1398, - "step": 16250 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 1.1406, - "step": 16300 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 1.1335, - "step": 16350 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 1.1326, - "step": 16400 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 1.1425, - "step": 16450 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 1.148, - "step": 16500 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 1.1368, - "step": 16550 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 1.1308, - "step": 16600 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 1.1298, - "step": 16650 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 1.1352, - "step": 16700 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 1.1327, - "step": 16750 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 1.1409, - "step": 16800 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 1.1263, - "step": 16850 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 1.1304, - "step": 16900 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 1.1298, - "step": 16950 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 1.1238, - "step": 17000 - }, - { - "epoch": 1.04, - "eval_loss": 0.6559091210365295, - "eval_runtime": 39.2532, - "eval_samples_per_second": 4.102, - "eval_steps_per_second": 0.076, - "eval_wer": 36.38855054811206, - "step": 17000 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 1.1387, - "step": 17050 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 1.1357, - "step": 17100 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 1.1308, - "step": 17150 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 1.1327, - "step": 17200 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 1.1278, - "step": 17250 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 1.1384, - "step": 17300 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 1.1433, - "step": 17350 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 1.1354, - "step": 17400 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 1.1356, - "step": 17450 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 1.1271, - "step": 17500 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 1.1263, - "step": 17550 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 1.1367, - "step": 17600 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 1.1222, - "step": 17650 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 1.1357, - "step": 17700 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 1.1127, - "step": 17750 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 1.1303, - "step": 17800 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 1.1174, - "step": 17850 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 1.1407, - "step": 17900 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 1.1321, - "step": 17950 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 1.1184, - "step": 18000 - }, - { - "epoch": 1.05, - "eval_loss": 0.6509166955947876, - "eval_runtime": 39.722, - "eval_samples_per_second": 4.053, - "eval_steps_per_second": 0.076, - "eval_wer": 36.510353227771006, - "step": 18000 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 1.1373, - "step": 18050 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 1.1208, - "step": 18100 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 1.1138, - "step": 18150 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 1.1173, - "step": 18200 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 1.1235, - "step": 18250 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 1.1106, - "step": 18300 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 1.1157, - "step": 18350 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 1.1321, - "step": 18400 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 1.1391, - "step": 18450 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 1.112, - "step": 18500 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 1.1163, - "step": 18550 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 1.1226, - "step": 18600 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 1.1053, - "step": 18650 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 1.1184, - "step": 18700 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 1.1134, - "step": 18750 - }, - { - "epoch": 1.06, - "learning_rate": 3e-06, - "loss": 1.1112, - "step": 18800 - }, - { - "epoch": 1.06, - "learning_rate": 3e-06, - "loss": 1.1194, - "step": 18850 - }, - { - "epoch": 1.06, - "learning_rate": 3e-06, - "loss": 1.1184, - "step": 18900 - }, - { - "epoch": 1.06, - "learning_rate": 3e-06, - "loss": 1.1182, - "step": 18950 - }, - { - "epoch": 1.06, - "learning_rate": 3e-06, - "loss": 1.115, - "step": 19000 - }, - { - "epoch": 1.06, - "eval_loss": 0.6451593041419983, - "eval_runtime": 41.32, - "eval_samples_per_second": 3.896, - "eval_steps_per_second": 0.073, - "eval_wer": 35.99269183922046, - "step": 19000 - }, - { - "epoch": 1.06, - "learning_rate": 3e-06, - "loss": 1.121, - "step": 19050 - }, - { - "epoch": 1.06, - "learning_rate": 3e-06, - "loss": 1.1133, - "step": 19100 - }, - { - "epoch": 1.06, - "learning_rate": 3e-06, - "loss": 1.1041, - "step": 19150 - }, - { - "epoch": 1.06, - "learning_rate": 3e-06, - "loss": 1.1116, - "step": 19200 - }, - { - "epoch": 1.06, - "learning_rate": 3e-06, - "loss": 1.112, - "step": 19250 - }, - { - "epoch": 1.06, - "learning_rate": 3e-06, - "loss": 1.112, - "step": 19300 - }, - { - "epoch": 1.06, - "learning_rate": 3e-06, - "loss": 1.1122, - "step": 19350 - }, - { - "epoch": 1.06, - "learning_rate": 3e-06, - "loss": 1.1059, - "step": 19400 - }, - { - "epoch": 1.06, - "learning_rate": 3e-06, - "loss": 1.1087, - "step": 19450 - }, - { - "epoch": 1.06, - "learning_rate": 3e-06, - "loss": 1.1146, - "step": 19500 - }, - { - "epoch": 1.06, - "learning_rate": 3e-06, - "loss": 1.1068, - "step": 19550 - }, - { - "epoch": 1.06, - "learning_rate": 3e-06, - "loss": 1.1058, - "step": 19600 - }, - { - "epoch": 1.06, - "learning_rate": 3e-06, - "loss": 1.112, - "step": 19650 - }, - { - "epoch": 1.06, - "learning_rate": 3e-06, - "loss": 1.0969, - "step": 19700 - }, - { - "epoch": 1.06, - "learning_rate": 3e-06, - "loss": 1.1134, - "step": 19750 - }, - { - "epoch": 1.07, - "learning_rate": 3e-06, - "loss": 1.0999, - "step": 19800 - }, - { - "epoch": 1.07, - "learning_rate": 3e-06, - "loss": 1.1006, - "step": 19850 - }, - { - "epoch": 1.07, - "learning_rate": 3e-06, - "loss": 1.1019, - "step": 19900 - }, - { - "epoch": 1.07, - "learning_rate": 3e-06, - "loss": 1.1105, - "step": 19950 - }, - { - "epoch": 1.07, - "learning_rate": 3e-06, - "loss": 1.1013, - "step": 20000 - }, - { - "epoch": 1.07, - "eval_loss": 0.6382384896278381, - "eval_runtime": 40.3978, - "eval_samples_per_second": 3.985, - "eval_steps_per_second": 0.074, - "eval_wer": 34.50060901339829, - "step": 20000 - }, - { - "epoch": 1.07, - "learning_rate": 3e-06, - "loss": 1.121, - "step": 20050 - }, - { - "epoch": 1.07, - "learning_rate": 3e-06, - "loss": 1.1122, - "step": 20100 - }, - { - "epoch": 1.07, - "learning_rate": 3e-06, - "loss": 1.1039, - "step": 20150 - }, - { - "epoch": 1.07, - "learning_rate": 3e-06, - "loss": 1.1013, - "step": 20200 - }, - { - "epoch": 1.07, - "learning_rate": 3e-06, - "loss": 1.1097, - "step": 20250 - }, - { - "epoch": 1.07, - "learning_rate": 3e-06, - "loss": 1.1002, - "step": 20300 - }, - { - "epoch": 1.07, - "learning_rate": 3e-06, - "loss": 1.0965, - "step": 20350 - }, - { - "epoch": 1.07, - "learning_rate": 3e-06, - "loss": 1.0903, - "step": 20400 - }, - { - "epoch": 1.07, - "learning_rate": 3e-06, - "loss": 1.1016, - "step": 20450 - }, - { - "epoch": 1.07, - "learning_rate": 3e-06, - "loss": 1.0923, - "step": 20500 - }, - { - "epoch": 1.07, - "learning_rate": 3e-06, - "loss": 1.0983, - "step": 20550 - }, - { - "epoch": 1.07, - "learning_rate": 3e-06, - "loss": 1.1011, - "step": 20600 - }, - { - "epoch": 1.07, - "learning_rate": 3e-06, - "loss": 1.0963, - "step": 20650 - }, - { - "epoch": 1.07, - "learning_rate": 3e-06, - "loss": 1.0952, - "step": 20700 - }, - { - "epoch": 1.07, - "learning_rate": 3e-06, - "loss": 1.0972, - "step": 20750 - }, - { - "epoch": 1.08, - "learning_rate": 3e-06, - "loss": 1.1087, - "step": 20800 - }, - { - "epoch": 1.08, - "learning_rate": 3e-06, - "loss": 1.0958, - "step": 20850 - }, - { - "epoch": 1.08, - "learning_rate": 3e-06, - "loss": 1.0902, - "step": 20900 - }, - { - "epoch": 1.08, - "learning_rate": 3e-06, - "loss": 1.0958, - "step": 20950 - }, - { - "epoch": 1.08, - "learning_rate": 3e-06, - "loss": 1.0969, - "step": 21000 - }, - { - "epoch": 1.08, - "eval_loss": 0.633127748966217, - "eval_runtime": 40.5043, - "eval_samples_per_second": 3.975, - "eval_steps_per_second": 0.074, - "eval_wer": 34.348355663824606, - "step": 21000 - }, - { - "epoch": 1.08, - "learning_rate": 3e-06, - "loss": 1.0988, - "step": 21050 - }, - { - "epoch": 1.08, - "learning_rate": 3e-06, - "loss": 1.0947, - "step": 21100 - }, - { - "epoch": 1.08, - "learning_rate": 3e-06, - "loss": 1.0964, - "step": 21150 - }, - { - "epoch": 1.08, - "learning_rate": 3e-06, - "loss": 1.0977, - "step": 21200 - }, - { - "epoch": 1.08, - "learning_rate": 3e-06, - "loss": 1.0986, - "step": 21250 - }, - { - "epoch": 1.08, - "learning_rate": 3e-06, - "loss": 1.1004, - "step": 21300 - }, - { - "epoch": 1.08, - "learning_rate": 3e-06, - "loss": 1.0951, - "step": 21350 - }, - { - "epoch": 1.08, - "learning_rate": 3e-06, - "loss": 1.086, - "step": 21400 - }, - { - "epoch": 1.08, - "learning_rate": 3e-06, - "loss": 1.0905, - "step": 21450 - }, - { - "epoch": 1.08, - "learning_rate": 3e-06, - "loss": 1.0967, - "step": 21500 - }, - { - "epoch": 1.08, - "learning_rate": 3e-06, - "loss": 1.079, - "step": 21550 - }, - { - "epoch": 2.0, - "learning_rate": 3e-06, - "loss": 1.0978, - "step": 21600 - }, - { - "epoch": 2.0, - "learning_rate": 3e-06, - "loss": 1.0779, - "step": 21650 - }, - { - "epoch": 2.0, - "learning_rate": 3e-06, - "loss": 1.0855, - "step": 21700 - }, - { - "epoch": 2.0, - "learning_rate": 3e-06, - "loss": 1.0775, - "step": 21750 - }, - { - "epoch": 2.0, - "learning_rate": 3e-06, - "loss": 1.068, - "step": 21800 - }, - { - "epoch": 2.0, - "learning_rate": 3e-06, - "loss": 1.0789, - "step": 21850 - }, - { - "epoch": 2.0, - "learning_rate": 3e-06, - "loss": 1.0748, - "step": 21900 - }, - { - "epoch": 2.0, - "learning_rate": 3e-06, - "loss": 1.075, - "step": 21950 - }, - { - "epoch": 2.0, - "learning_rate": 3e-06, - "loss": 1.0784, - "step": 22000 - }, - { - "epoch": 2.0, - "eval_loss": 0.6303825974464417, - "eval_runtime": 38.4993, - "eval_samples_per_second": 4.182, - "eval_steps_per_second": 0.078, - "eval_wer": 34.28745432399513, - "step": 22000 - }, - { - "epoch": 2.0, - "learning_rate": 3e-06, - "loss": 1.0746, - "step": 22050 - }, - { - "epoch": 2.01, - "learning_rate": 3e-06, - "loss": 1.0761, - "step": 22100 - }, - { - "epoch": 2.01, - "learning_rate": 3e-06, - "loss": 1.0793, - "step": 22150 - }, - { - "epoch": 2.01, - "learning_rate": 3e-06, - "loss": 1.0814, - "step": 22200 - }, - { - "epoch": 2.01, - "learning_rate": 3e-06, - "loss": 1.0801, - "step": 22250 - }, - { - "epoch": 2.01, - "learning_rate": 3e-06, - "loss": 1.0791, - "step": 22300 - }, - { - "epoch": 2.01, - "learning_rate": 3e-06, - "loss": 1.0718, - "step": 22350 - }, - { - "epoch": 2.01, - "learning_rate": 3e-06, - "loss": 1.0855, - "step": 22400 - }, - { - "epoch": 2.01, - "learning_rate": 3e-06, - "loss": 1.0949, - "step": 22450 - }, - { - "epoch": 2.01, - "learning_rate": 3e-06, - "loss": 1.0627, - "step": 22500 - }, - { - "epoch": 2.01, - "learning_rate": 3e-06, - "loss": 1.0873, - "step": 22550 - }, - { - "epoch": 2.01, - "learning_rate": 3e-06, - "loss": 1.0698, - "step": 22600 - }, - { - "epoch": 2.01, - "learning_rate": 3e-06, - "loss": 1.0667, - "step": 22650 - }, - { - "epoch": 2.01, - "learning_rate": 3e-06, - "loss": 1.0735, - "step": 22700 - }, - { - "epoch": 2.01, - "learning_rate": 3e-06, - "loss": 1.0821, - "step": 22750 - }, - { - "epoch": 2.01, - "learning_rate": 3e-06, - "loss": 1.0794, - "step": 22800 - }, - { - "epoch": 2.01, - "learning_rate": 3e-06, - "loss": 1.0646, - "step": 22850 - }, - { - "epoch": 2.01, - "learning_rate": 3e-06, - "loss": 1.0684, - "step": 22900 - }, - { - "epoch": 2.01, - "learning_rate": 3e-06, - "loss": 1.0673, - "step": 22950 - }, - { - "epoch": 2.01, - "learning_rate": 3e-06, - "loss": 1.0774, - "step": 23000 - }, - { - "epoch": 2.01, - "eval_loss": 0.6248754262924194, - "eval_runtime": 38.6299, - "eval_samples_per_second": 4.168, - "eval_steps_per_second": 0.078, - "eval_wer": 34.104750304506695, - "step": 23000 - }, - { - "epoch": 2.01, - "learning_rate": 3e-06, - "loss": 1.065, - "step": 23050 - }, - { - "epoch": 2.02, - "learning_rate": 3e-06, - "loss": 1.0757, - "step": 23100 - }, - { - "epoch": 2.02, - "learning_rate": 3e-06, - "loss": 1.0676, - "step": 23150 - }, - { - "epoch": 2.02, - "learning_rate": 3e-06, - "loss": 1.0604, - "step": 23200 - }, - { - "epoch": 2.02, - "learning_rate": 3e-06, - "loss": 1.071, - "step": 23250 - }, - { - "epoch": 2.02, - "learning_rate": 3e-06, - "loss": 1.0778, - "step": 23300 - }, - { - "epoch": 2.02, - "learning_rate": 3e-06, - "loss": 1.063, - "step": 23350 - }, - { - "epoch": 2.02, - "learning_rate": 3e-06, - "loss": 1.0705, - "step": 23400 - }, - { - "epoch": 2.02, - "learning_rate": 3e-06, - "loss": 1.0639, - "step": 23450 - }, - { - "epoch": 2.02, - "learning_rate": 3e-06, - "loss": 1.057, - "step": 23500 - }, - { - "epoch": 2.02, - "learning_rate": 3e-06, - "loss": 1.0617, - "step": 23550 - }, - { - "epoch": 2.02, - "learning_rate": 3e-06, - "loss": 1.0623, - "step": 23600 - }, - { - "epoch": 2.02, - "learning_rate": 3e-06, - "loss": 1.0778, - "step": 23650 - }, - { - "epoch": 2.02, - "learning_rate": 3e-06, - "loss": 1.0687, - "step": 23700 - }, - { - "epoch": 2.02, - "learning_rate": 3e-06, - "loss": 1.0626, - "step": 23750 - }, - { - "epoch": 2.02, - "learning_rate": 3e-06, - "loss": 1.0781, - "step": 23800 - }, - { - "epoch": 2.02, - "learning_rate": 3e-06, - "loss": 1.0579, - "step": 23850 - }, - { - "epoch": 2.02, - "learning_rate": 3e-06, - "loss": 1.0624, - "step": 23900 - }, - { - "epoch": 2.02, - "learning_rate": 3e-06, - "loss": 1.0719, - "step": 23950 - }, - { - "epoch": 2.02, - "learning_rate": 3e-06, - "loss": 1.0719, - "step": 24000 - }, - { - "epoch": 2.02, - "eval_loss": 0.6194329857826233, - "eval_runtime": 39.0738, - "eval_samples_per_second": 4.12, - "eval_steps_per_second": 0.077, - "eval_wer": 33.830694275274055, - "step": 24000 - }, - { - "epoch": 2.02, - "learning_rate": 3e-06, - "loss": 1.0779, - "step": 24050 - }, - { - "epoch": 2.03, - "learning_rate": 3e-06, - "loss": 1.0651, - "step": 24100 - }, - { - "epoch": 2.03, - "learning_rate": 3e-06, - "loss": 1.0654, - "step": 24150 - }, - { - "epoch": 2.03, - "learning_rate": 3e-06, - "loss": 1.0647, - "step": 24200 - }, - { - "epoch": 2.03, - "learning_rate": 3e-06, - "loss": 1.0693, - "step": 24250 - }, - { - "epoch": 2.03, - "learning_rate": 3e-06, - "loss": 1.0682, - "step": 24300 - }, - { - "epoch": 2.03, - "learning_rate": 3e-06, - "loss": 1.0733, - "step": 24350 - }, - { - "epoch": 2.03, - "learning_rate": 3e-06, - "loss": 1.0597, - "step": 24400 - }, - { - "epoch": 2.03, - "learning_rate": 3e-06, - "loss": 1.0737, - "step": 24450 - }, - { - "epoch": 2.03, - "learning_rate": 3e-06, - "loss": 1.0547, - "step": 24500 - }, - { - "epoch": 2.03, - "learning_rate": 3e-06, - "loss": 1.063, - "step": 24550 - }, - { - "epoch": 2.03, - "learning_rate": 3e-06, - "loss": 1.0582, - "step": 24600 - }, - { - "epoch": 2.03, - "learning_rate": 3e-06, - "loss": 1.06, - "step": 24650 - }, - { - "epoch": 2.03, - "learning_rate": 3e-06, - "loss": 1.0713, - "step": 24700 - }, - { - "epoch": 2.03, - "learning_rate": 3e-06, - "loss": 1.0699, - "step": 24750 - }, - { - "epoch": 2.03, - "learning_rate": 3e-06, - "loss": 1.0552, - "step": 24800 - }, - { - "epoch": 2.03, - "learning_rate": 3e-06, - "loss": 1.0566, - "step": 24850 - }, - { - "epoch": 2.03, - "learning_rate": 3e-06, - "loss": 1.048, - "step": 24900 - }, - { - "epoch": 2.03, - "learning_rate": 3e-06, - "loss": 1.0529, - "step": 24950 - }, - { - "epoch": 2.03, - "learning_rate": 3e-06, - "loss": 1.0638, - "step": 25000 - }, - { - "epoch": 2.03, - "eval_loss": 0.6157576441764832, - "eval_runtime": 39.7564, - "eval_samples_per_second": 4.05, - "eval_steps_per_second": 0.075, - "eval_wer": 32.97807551766139, - "step": 25000 - }, - { - "epoch": 2.03, - "learning_rate": 3e-06, - "loss": 1.0555, - "step": 25050 - }, - { - "epoch": 2.04, - "learning_rate": 3e-06, - "loss": 1.0485, - "step": 25100 - }, - { - "epoch": 2.04, - "learning_rate": 3e-06, - "loss": 1.0554, - "step": 25150 - }, - { - "epoch": 2.04, - "learning_rate": 3e-06, - "loss": 1.061, - "step": 25200 - }, - { - "epoch": 2.04, - "learning_rate": 3e-06, - "loss": 1.0582, - "step": 25250 - }, - { - "epoch": 2.04, - "learning_rate": 3e-06, - "loss": 1.0426, - "step": 25300 - }, - { - "epoch": 2.04, - "learning_rate": 3e-06, - "loss": 1.0551, - "step": 25350 - }, - { - "epoch": 2.04, - "learning_rate": 3e-06, - "loss": 1.0577, - "step": 25400 - }, - { - "epoch": 2.04, - "learning_rate": 3e-06, - "loss": 1.0811, - "step": 25450 - }, - { - "epoch": 2.04, - "learning_rate": 3e-06, - "loss": 1.0541, - "step": 25500 - }, - { - "epoch": 2.04, - "learning_rate": 3e-06, - "loss": 1.0446, - "step": 25550 - }, - { - "epoch": 2.04, - "learning_rate": 3e-06, - "loss": 1.0642, - "step": 25600 - }, - { - "epoch": 2.04, - "learning_rate": 3e-06, - "loss": 1.0492, - "step": 25650 - }, - { - "epoch": 2.04, - "learning_rate": 3e-06, - "loss": 1.0469, - "step": 25700 - }, - { - "epoch": 2.04, - "learning_rate": 3e-06, - "loss": 1.052, - "step": 25750 - }, - { - "epoch": 2.04, - "learning_rate": 3e-06, - "loss": 1.0534, - "step": 25800 - }, - { - "epoch": 2.04, - "learning_rate": 3e-06, - "loss": 1.0457, - "step": 25850 - }, - { - "epoch": 2.04, - "learning_rate": 3e-06, - "loss": 1.0459, - "step": 25900 - }, - { - "epoch": 2.04, - "learning_rate": 3e-06, - "loss": 1.0684, - "step": 25950 - }, - { - "epoch": 2.04, - "learning_rate": 3e-06, - "loss": 1.0592, - "step": 26000 - }, - { - "epoch": 2.04, - "eval_loss": 0.610471785068512, - "eval_runtime": 40.4875, - "eval_samples_per_second": 3.977, - "eval_steps_per_second": 0.074, - "eval_wer": 32.64311814859927, - "step": 26000 - }, - { - "epoch": 2.04, - "learning_rate": 3e-06, - "loss": 1.0649, - "step": 26050 - }, - { - "epoch": 2.05, - "learning_rate": 3e-06, - "loss": 1.0508, - "step": 26100 - }, - { - "epoch": 2.05, - "learning_rate": 3e-06, - "loss": 1.0518, - "step": 26150 - }, - { - "epoch": 2.05, - "learning_rate": 3e-06, - "loss": 1.0587, - "step": 26200 - }, - { - "epoch": 2.05, - "learning_rate": 3e-06, - "loss": 1.05, - "step": 26250 - }, - { - "epoch": 2.05, - "learning_rate": 3e-06, - "loss": 1.0449, - "step": 26300 - }, - { - "epoch": 2.05, - "learning_rate": 3e-06, - "loss": 1.0514, - "step": 26350 - }, - { - "epoch": 2.05, - "learning_rate": 3e-06, - "loss": 1.0542, - "step": 26400 - }, - { - "epoch": 2.05, - "learning_rate": 3e-06, - "loss": 1.0587, - "step": 26450 - }, - { - "epoch": 2.05, - "learning_rate": 3e-06, - "loss": 1.0566, - "step": 26500 - }, - { - "epoch": 2.05, - "learning_rate": 3e-06, - "loss": 1.0393, - "step": 26550 - }, - { - "epoch": 2.05, - "learning_rate": 3e-06, - "loss": 1.0537, - "step": 26600 - }, - { - "epoch": 2.05, - "learning_rate": 3e-06, - "loss": 1.0457, - "step": 26650 - }, - { - "epoch": 2.05, - "learning_rate": 3e-06, - "loss": 1.0475, - "step": 26700 - }, - { - "epoch": 2.05, - "learning_rate": 3e-06, - "loss": 1.0591, - "step": 26750 - }, - { - "epoch": 2.05, - "learning_rate": 3e-06, - "loss": 1.0476, - "step": 26800 - }, - { - "epoch": 2.05, - "learning_rate": 3e-06, - "loss": 1.0537, - "step": 26850 - }, - { - "epoch": 2.05, - "learning_rate": 3e-06, - "loss": 1.0414, - "step": 26900 - }, - { - "epoch": 2.05, - "learning_rate": 3e-06, - "loss": 1.0596, - "step": 26950 - }, - { - "epoch": 2.05, - "learning_rate": 3e-06, - "loss": 1.0493, - "step": 27000 - }, - { - "epoch": 2.05, - "eval_loss": 0.6040655970573425, - "eval_runtime": 38.168, - "eval_samples_per_second": 4.218, - "eval_steps_per_second": 0.079, - "eval_wer": 32.734470158343484, - "step": 27000 - }, - { - "epoch": 2.05, - "learning_rate": 3e-06, - "loss": 1.048, - "step": 27050 - }, - { - "epoch": 2.06, - "learning_rate": 3e-06, - "loss": 1.0542, - "step": 27100 - }, - { - "epoch": 2.06, - "learning_rate": 3e-06, - "loss": 1.0474, - "step": 27150 - }, - { - "epoch": 2.06, - "learning_rate": 3e-06, - "loss": 1.0424, - "step": 27200 - }, - { - "epoch": 2.06, - "learning_rate": 3e-06, - "loss": 1.0337, - "step": 27250 - }, - { - "epoch": 2.06, - "learning_rate": 3e-06, - "loss": 1.0424, - "step": 27300 - }, - { - "epoch": 2.06, - "learning_rate": 3e-06, - "loss": 1.0342, - "step": 27350 - }, - { - "epoch": 2.06, - "learning_rate": 3e-06, - "loss": 1.0401, - "step": 27400 - }, - { - "epoch": 2.06, - "learning_rate": 3e-06, - "loss": 1.0388, - "step": 27450 - }, - { - "epoch": 2.06, - "learning_rate": 3e-06, - "loss": 1.0323, - "step": 27500 - }, - { - "epoch": 2.06, - "learning_rate": 3e-06, - "loss": 1.0361, - "step": 27550 - }, - { - "epoch": 2.06, - "learning_rate": 3e-06, - "loss": 1.0322, - "step": 27600 - }, - { - "epoch": 2.06, - "learning_rate": 3e-06, - "loss": 1.0474, - "step": 27650 - }, - { - "epoch": 2.06, - "learning_rate": 3e-06, - "loss": 1.05, - "step": 27700 - }, - { - "epoch": 2.06, - "learning_rate": 3e-06, - "loss": 1.0501, - "step": 27750 - }, - { - "epoch": 2.06, - "learning_rate": 3e-06, - "loss": 1.0425, - "step": 27800 - }, - { - "epoch": 2.06, - "learning_rate": 3e-06, - "loss": 1.0421, - "step": 27850 - }, - { - "epoch": 2.06, - "learning_rate": 3e-06, - "loss": 1.0453, - "step": 27900 - }, - { - "epoch": 2.06, - "learning_rate": 3e-06, - "loss": 1.0352, - "step": 27950 - }, - { - "epoch": 2.06, - "learning_rate": 3e-06, - "loss": 1.047, - "step": 28000 - }, - { - "epoch": 2.06, - "eval_loss": 0.6039990186691284, - "eval_runtime": 39.0034, - "eval_samples_per_second": 4.128, - "eval_steps_per_second": 0.077, - "eval_wer": 32.76492082825822, - "step": 28000 - }, - { - "epoch": 2.06, - "learning_rate": 3e-06, - "loss": 1.0366, - "step": 28050 - }, - { - "epoch": 2.07, - "learning_rate": 3e-06, - "loss": 1.0588, - "step": 28100 - }, - { - "epoch": 2.07, - "learning_rate": 3e-06, - "loss": 1.0429, - "step": 28150 - }, - { - "epoch": 2.07, - "learning_rate": 3e-06, - "loss": 1.0461, - "step": 28200 - }, - { - "epoch": 2.07, - "learning_rate": 3e-06, - "loss": 1.0347, - "step": 28250 - }, - { - "epoch": 2.07, - "learning_rate": 3e-06, - "loss": 1.038, - "step": 28300 - }, - { - "epoch": 2.07, - "learning_rate": 3e-06, - "loss": 1.0361, - "step": 28350 - }, - { - "epoch": 2.07, - "learning_rate": 3e-06, - "loss": 1.0226, - "step": 28400 - }, - { - "epoch": 2.07, - "learning_rate": 3e-06, - "loss": 1.0337, - "step": 28450 - }, - { - "epoch": 2.07, - "learning_rate": 3e-06, - "loss": 1.0351, - "step": 28500 - }, - { - "epoch": 2.07, - "learning_rate": 3e-06, - "loss": 1.0279, - "step": 28550 - }, - { - "epoch": 2.07, - "learning_rate": 3e-06, - "loss": 1.0411, - "step": 28600 - }, - { - "epoch": 2.07, - "learning_rate": 3e-06, - "loss": 1.0384, - "step": 28650 - }, - { - "epoch": 2.07, - "learning_rate": 3e-06, - "loss": 1.0434, - "step": 28700 - }, - { - "epoch": 2.07, - "learning_rate": 3e-06, - "loss": 1.0414, - "step": 28750 - }, - { - "epoch": 2.07, - "learning_rate": 3e-06, - "loss": 1.0305, - "step": 28800 - }, - { - "epoch": 2.07, - "learning_rate": 3e-06, - "loss": 1.0301, - "step": 28850 - }, - { - "epoch": 2.07, - "learning_rate": 3e-06, - "loss": 1.0293, - "step": 28900 - }, - { - "epoch": 2.07, - "learning_rate": 3e-06, - "loss": 1.0324, - "step": 28950 - }, - { - "epoch": 2.07, - "learning_rate": 3e-06, - "loss": 1.0323, - "step": 29000 - }, - { - "epoch": 2.07, - "eval_loss": 0.5984179377555847, - "eval_runtime": 38.0029, - "eval_samples_per_second": 4.237, - "eval_steps_per_second": 0.079, - "eval_wer": 31.60779537149817, - "step": 29000 - }, - { - "epoch": 2.07, - "learning_rate": 3e-06, - "loss": 1.0444, - "step": 29050 - }, - { - "epoch": 2.08, - "learning_rate": 3e-06, - "loss": 1.0328, - "step": 29100 - }, - { - "epoch": 2.08, - "learning_rate": 3e-06, - "loss": 1.0334, - "step": 29150 - }, - { - "epoch": 2.08, - "learning_rate": 3e-06, - "loss": 1.0332, - "step": 29200 - }, - { - "epoch": 2.08, - "learning_rate": 3e-06, - "loss": 1.0297, - "step": 29250 - }, - { - "epoch": 2.08, - "learning_rate": 3e-06, - "loss": 1.0441, - "step": 29300 - }, - { - "epoch": 2.08, - "learning_rate": 3e-06, - "loss": 1.0319, - "step": 29350 - }, - { - "epoch": 2.08, - "learning_rate": 3e-06, - "loss": 1.0454, - "step": 29400 - }, - { - "epoch": 2.08, - "learning_rate": 3e-06, - "loss": 1.0286, - "step": 29450 - }, - { - "epoch": 2.08, - "learning_rate": 3e-06, - "loss": 1.0227, - "step": 29500 - }, - { - "epoch": 2.08, - "learning_rate": 3e-06, - "loss": 1.0299, - "step": 29550 - }, - { - "epoch": 2.08, - "learning_rate": 3e-06, - "loss": 1.0317, - "step": 29600 - }, - { - "epoch": 2.08, - "learning_rate": 3e-06, - "loss": 1.0227, - "step": 29650 - }, - { - "epoch": 2.08, - "learning_rate": 3e-06, - "loss": 1.0374, - "step": 29700 - }, - { - "epoch": 2.08, - "learning_rate": 3e-06, - "loss": 1.0352, - "step": 29750 - }, - { - "epoch": 2.08, - "learning_rate": 3e-06, - "loss": 1.0176, - "step": 29800 - }, - { - "epoch": 2.08, - "learning_rate": 3e-06, - "loss": 1.0285, - "step": 29850 - }, - { - "epoch": 3.0, - "learning_rate": 3e-06, - "loss": 1.0243, - "step": 29900 - }, - { - "epoch": 3.0, - "learning_rate": 3e-06, - "loss": 1.0192, - "step": 29950 - }, - { - "epoch": 3.0, - "learning_rate": 3e-06, - "loss": 1.0189, - "step": 30000 - }, - { - "epoch": 3.0, - "eval_loss": 0.5957360863685608, - "eval_runtime": 30.8132, - "eval_samples_per_second": 5.225, - "eval_steps_per_second": 0.097, - "eval_wer": 31.303288672350792, - "step": 30000 - }, - { - "epoch": 3.0, - "learning_rate": 3e-06, - "loss": 1.0133, - "step": 30050 - }, - { - "epoch": 3.0, - "learning_rate": 3e-06, - "loss": 1.0144, - "step": 30100 - }, - { - "epoch": 3.0, - "learning_rate": 3e-06, - "loss": 1.0279, - "step": 30150 - }, - { - "epoch": 3.0, - "learning_rate": 3e-06, - "loss": 1.0148, - "step": 30200 - }, - { - "epoch": 3.0, - "learning_rate": 3e-06, - "loss": 1.0192, - "step": 30250 - }, - { - "epoch": 3.0, - "learning_rate": 3e-06, - "loss": 1.0246, - "step": 30300 - }, - { - "epoch": 3.0, - "learning_rate": 3e-06, - "loss": 1.026, - "step": 30350 - }, - { - "epoch": 3.01, - "learning_rate": 3e-06, - "loss": 1.0173, - "step": 30400 - }, - { - "epoch": 3.01, - "learning_rate": 3e-06, - "loss": 1.0273, - "step": 30450 - }, - { - "epoch": 3.01, - "learning_rate": 3e-06, - "loss": 1.0306, - "step": 30500 - }, - { - "epoch": 3.01, - "learning_rate": 3e-06, - "loss": 1.0115, - "step": 30550 - }, - { - "epoch": 3.01, - "learning_rate": 3e-06, - "loss": 1.0172, - "step": 30600 - }, - { - "epoch": 3.01, - "learning_rate": 3e-06, - "loss": 1.0068, - "step": 30650 - }, - { - "epoch": 3.01, - "learning_rate": 3e-06, - "loss": 1.0066, - "step": 30700 - }, - { - "epoch": 3.01, - "learning_rate": 3e-06, - "loss": 1.0204, - "step": 30750 - }, - { - "epoch": 3.01, - "learning_rate": 3e-06, - "loss": 1.0206, - "step": 30800 - }, - { - "epoch": 3.01, - "learning_rate": 3e-06, - "loss": 1.0158, - "step": 30850 - }, - { - "epoch": 3.01, - "learning_rate": 3e-06, - "loss": 1.0117, - "step": 30900 - }, - { - "epoch": 3.01, - "learning_rate": 3e-06, - "loss": 1.0096, - "step": 30950 - }, - { - "epoch": 3.01, - "learning_rate": 3e-06, - "loss": 1.0078, - "step": 31000 - }, - { - "epoch": 3.01, - "eval_loss": 0.5924085378646851, - "eval_runtime": 31.4548, - "eval_samples_per_second": 5.118, - "eval_steps_per_second": 0.095, - "eval_wer": 31.425091352009744, - "step": 31000 - }, - { - "epoch": 3.01, - "learning_rate": 3e-06, - "loss": 1.0176, - "step": 31050 - }, - { - "epoch": 3.01, - "learning_rate": 3e-06, - "loss": 1.0132, - "step": 31100 - }, - { - "epoch": 3.01, - "learning_rate": 3e-06, - "loss": 1.02, - "step": 31150 - }, - { - "epoch": 3.01, - "learning_rate": 3e-06, - "loss": 1.0144, - "step": 31200 - }, - { - "epoch": 3.01, - "learning_rate": 3e-06, - "loss": 1.0031, - "step": 31250 - }, - { - "epoch": 3.01, - "learning_rate": 3e-06, - "loss": 1.0067, - "step": 31300 - }, - { - "epoch": 3.01, - "learning_rate": 3e-06, - "loss": 1.0025, - "step": 31350 - }, - { - "epoch": 3.02, - "learning_rate": 3e-06, - "loss": 1.0099, - "step": 31400 - }, - { - "epoch": 3.02, - "learning_rate": 3e-06, - "loss": 1.0186, - "step": 31450 - }, - { - "epoch": 3.02, - "learning_rate": 3e-06, - "loss": 1.0235, - "step": 31500 - }, - { - "epoch": 3.02, - "learning_rate": 3e-06, - "loss": 1.015, - "step": 31550 - }, - { - "epoch": 3.02, - "learning_rate": 3e-06, - "loss": 1.0154, - "step": 31600 - }, - { - "epoch": 3.02, - "learning_rate": 3e-06, - "loss": 1.0117, - "step": 31650 - }, - { - "epoch": 3.02, - "learning_rate": 3e-06, - "loss": 1.0027, - "step": 31700 - }, - { - "epoch": 3.02, - "learning_rate": 3e-06, - "loss": 1.0134, - "step": 31750 - }, - { - "epoch": 3.02, - "learning_rate": 3e-06, - "loss": 1.0055, - "step": 31800 - }, - { - "epoch": 3.02, - "learning_rate": 3e-06, - "loss": 1.0027, - "step": 31850 - }, - { - "epoch": 3.02, - "learning_rate": 3e-06, - "loss": 1.0099, - "step": 31900 - }, - { - "epoch": 3.02, - "learning_rate": 3e-06, - "loss": 1.0037, - "step": 31950 - }, - { - "epoch": 3.02, - "learning_rate": 3e-06, - "loss": 1.0146, - "step": 32000 - }, - { - "epoch": 3.02, - "eval_loss": 0.594041645526886, - "eval_runtime": 32.4443, - "eval_samples_per_second": 4.962, - "eval_steps_per_second": 0.092, - "eval_wer": 31.303288672350792, - "step": 32000 - }, - { - "epoch": 3.02, - "learning_rate": 3e-06, - "loss": 1.0069, - "step": 32050 - }, - { - "epoch": 3.02, - "learning_rate": 3e-06, - "loss": 1.0082, - "step": 32100 - }, - { - "epoch": 3.02, - "learning_rate": 3e-06, - "loss": 1.0154, - "step": 32150 - }, - { - "epoch": 3.02, - "learning_rate": 3e-06, - "loss": 1.0227, - "step": 32200 - }, - { - "epoch": 3.02, - "learning_rate": 3e-06, - "loss": 0.9945, - "step": 32250 - }, - { - "epoch": 3.02, - "learning_rate": 3e-06, - "loss": 1.0048, - "step": 32300 - }, - { - "epoch": 3.02, - "learning_rate": 3e-06, - "loss": 1.0101, - "step": 32350 - }, - { - "epoch": 3.03, - "learning_rate": 3e-06, - "loss": 0.9971, - "step": 32400 - }, - { - "epoch": 3.03, - "learning_rate": 3e-06, - "loss": 1.0, - "step": 32450 - }, - { - "epoch": 3.03, - "learning_rate": 3e-06, - "loss": 1.006, - "step": 32500 - }, - { - "epoch": 3.03, - "learning_rate": 3e-06, - "loss": 1.0083, - "step": 32550 - }, - { - "epoch": 3.03, - "learning_rate": 3e-06, - "loss": 1.006, - "step": 32600 - }, - { - "epoch": 3.03, - "learning_rate": 3e-06, - "loss": 1.0073, - "step": 32650 - }, - { - "epoch": 3.03, - "learning_rate": 3e-06, - "loss": 1.0048, - "step": 32700 - }, - { - "epoch": 3.03, - "learning_rate": 3e-06, - "loss": 1.0015, - "step": 32750 - }, - { - "epoch": 3.03, - "learning_rate": 3e-06, - "loss": 1.0101, - "step": 32800 - }, - { - "epoch": 3.03, - "learning_rate": 3e-06, - "loss": 1.01, - "step": 32850 - }, - { - "epoch": 3.03, - "learning_rate": 3e-06, - "loss": 1.0133, - "step": 32900 - }, - { - "epoch": 3.03, - "learning_rate": 3e-06, - "loss": 1.0069, - "step": 32950 - }, - { - "epoch": 3.03, - "learning_rate": 3e-06, - "loss": 1.0128, - "step": 33000 - }, - { - "epoch": 3.03, - "eval_loss": 0.5891727805137634, - "eval_runtime": 32.6258, - "eval_samples_per_second": 4.935, - "eval_steps_per_second": 0.092, - "eval_wer": 31.02923264311815, - "step": 33000 - }, - { - "epoch": 3.03, - "learning_rate": 3e-06, - "loss": 1.0075, - "step": 33050 - }, - { - "epoch": 3.03, - "learning_rate": 3e-06, - "loss": 1.0175, - "step": 33100 - }, - { - "epoch": 3.03, - "learning_rate": 3e-06, - "loss": 1.0083, - "step": 33150 - }, - { - "epoch": 3.03, - "learning_rate": 3e-06, - "loss": 0.9995, - "step": 33200 - }, - { - "epoch": 3.03, - "learning_rate": 3e-06, - "loss": 0.9996, - "step": 33250 - }, - { - "epoch": 3.03, - "learning_rate": 3e-06, - "loss": 1.0072, - "step": 33300 - }, - { - "epoch": 3.03, - "learning_rate": 3e-06, - "loss": 1.0196, - "step": 33350 - }, - { - "epoch": 3.04, - "learning_rate": 3e-06, - "loss": 1.0071, - "step": 33400 - }, - { - "epoch": 3.04, - "learning_rate": 3e-06, - "loss": 1.0073, - "step": 33450 - }, - { - "epoch": 3.04, - "learning_rate": 3e-06, - "loss": 1.012, - "step": 33500 - }, - { - "epoch": 3.04, - "learning_rate": 3e-06, - "loss": 1.0137, - "step": 33550 - }, - { - "epoch": 3.04, - "learning_rate": 3e-06, - "loss": 0.9993, - "step": 33600 - }, - { - "epoch": 3.04, - "learning_rate": 3e-06, - "loss": 1.0025, - "step": 33650 - }, - { - "epoch": 3.04, - "learning_rate": 3e-06, - "loss": 1.0055, - "step": 33700 - }, - { - "epoch": 3.04, - "learning_rate": 3e-06, - "loss": 1.0013, - "step": 33750 - }, - { - "epoch": 3.04, - "learning_rate": 3e-06, - "loss": 1.0068, - "step": 33800 - }, - { - "epoch": 3.04, - "learning_rate": 3e-06, - "loss": 1.0018, - "step": 33850 - }, - { - "epoch": 3.04, - "learning_rate": 3e-06, - "loss": 0.9998, - "step": 33900 - }, - { - "epoch": 3.04, - "learning_rate": 3e-06, - "loss": 0.9934, - "step": 33950 - }, - { - "epoch": 3.04, - "learning_rate": 3e-06, - "loss": 1.0025, - "step": 34000 - }, - { - "epoch": 3.04, - "eval_loss": 0.5873314738273621, - "eval_runtime": 32.6141, - "eval_samples_per_second": 4.937, - "eval_steps_per_second": 0.092, - "eval_wer": 31.181485992691837, - "step": 34000 - }, - { - "epoch": 3.04, - "learning_rate": 3e-06, - "loss": 1.0072, - "step": 34050 - }, - { - "epoch": 3.04, - "learning_rate": 3e-06, - "loss": 0.9934, - "step": 34100 - }, - { - "epoch": 3.04, - "learning_rate": 3e-06, - "loss": 1.0179, - "step": 34150 - }, - { - "epoch": 3.04, - "learning_rate": 3e-06, - "loss": 0.9992, - "step": 34200 - }, - { - "epoch": 3.04, - "learning_rate": 3e-06, - "loss": 0.9973, - "step": 34250 - }, - { - "epoch": 3.04, - "learning_rate": 3e-06, - "loss": 1.0184, - "step": 34300 - }, - { - "epoch": 3.04, - "learning_rate": 3e-06, - "loss": 1.0049, - "step": 34350 - }, - { - "epoch": 3.05, - "learning_rate": 3e-06, - "loss": 1.009, - "step": 34400 - }, - { - "epoch": 3.05, - "learning_rate": 3e-06, - "loss": 1.0099, - "step": 34450 - }, - { - "epoch": 3.05, - "learning_rate": 3e-06, - "loss": 1.0086, - "step": 34500 - }, - { - "epoch": 3.05, - "learning_rate": 3e-06, - "loss": 1.0038, - "step": 34550 - }, - { - "epoch": 3.05, - "learning_rate": 3e-06, - "loss": 0.9973, - "step": 34600 - }, - { - "epoch": 3.05, - "learning_rate": 3e-06, - "loss": 1.0189, - "step": 34650 - }, - { - "epoch": 3.05, - "learning_rate": 3e-06, - "loss": 1.0017, - "step": 34700 - }, - { - "epoch": 3.05, - "learning_rate": 3e-06, - "loss": 1.0013, - "step": 34750 - }, - { - "epoch": 3.05, - "learning_rate": 3e-06, - "loss": 0.9962, - "step": 34800 - }, - { - "epoch": 3.05, - "learning_rate": 3e-06, - "loss": 1.0062, - "step": 34850 - }, - { - "epoch": 3.05, - "learning_rate": 3e-06, - "loss": 0.9956, - "step": 34900 - }, - { - "epoch": 3.05, - "learning_rate": 3e-06, - "loss": 0.9908, - "step": 34950 - }, - { - "epoch": 3.05, - "learning_rate": 3e-06, - "loss": 0.999, - "step": 35000 - }, - { - "epoch": 3.05, - "eval_loss": 0.5838064551353455, - "eval_runtime": 29.7975, - "eval_samples_per_second": 5.403, - "eval_steps_per_second": 0.101, - "eval_wer": 30.633373934226555, - "step": 35000 - }, - { - "epoch": 3.05, - "learning_rate": 3e-06, - "loss": 1.0019, - "step": 35050 - }, - { - "epoch": 3.05, - "learning_rate": 3e-06, - "loss": 1.0049, - "step": 35100 - }, - { - "epoch": 3.05, - "learning_rate": 3e-06, - "loss": 1.0116, - "step": 35150 - }, - { - "epoch": 3.05, - "learning_rate": 3e-06, - "loss": 1.0041, - "step": 35200 - }, - { - "epoch": 3.05, - "learning_rate": 3e-06, - "loss": 1.0047, - "step": 35250 - }, - { - "epoch": 3.05, - "learning_rate": 3e-06, - "loss": 1.003, - "step": 35300 - }, - { - "epoch": 3.05, - "learning_rate": 3e-06, - "loss": 0.9984, - "step": 35350 - }, - { - "epoch": 3.06, - "learning_rate": 3e-06, - "loss": 1.0072, - "step": 35400 - }, - { - "epoch": 3.06, - "learning_rate": 3e-06, - "loss": 0.9937, - "step": 35450 - }, - { - "epoch": 3.06, - "learning_rate": 3e-06, - "loss": 0.9983, - "step": 35500 - }, - { - "epoch": 3.06, - "learning_rate": 3e-06, - "loss": 1.0007, - "step": 35550 - }, - { - "epoch": 3.06, - "learning_rate": 3e-06, - "loss": 0.998, - "step": 35600 - }, - { - "epoch": 3.06, - "learning_rate": 3e-06, - "loss": 0.9986, - "step": 35650 - }, - { - "epoch": 3.06, - "learning_rate": 3e-06, - "loss": 1.0009, - "step": 35700 - }, - { - "epoch": 3.06, - "learning_rate": 3e-06, - "loss": 0.9982, - "step": 35750 - }, - { - "epoch": 3.06, - "learning_rate": 3e-06, - "loss": 1.0009, - "step": 35800 - }, - { - "epoch": 3.06, - "learning_rate": 3e-06, - "loss": 0.9946, - "step": 35850 - }, - { - "epoch": 3.06, - "learning_rate": 3e-06, - "loss": 1.0014, - "step": 35900 - }, - { - "epoch": 3.06, - "learning_rate": 3e-06, - "loss": 1.0009, - "step": 35950 - }, - { - "epoch": 3.06, - "learning_rate": 3e-06, - "loss": 1.0045, - "step": 36000 - }, - { - "epoch": 3.06, - "eval_loss": 0.5799316167831421, - "eval_runtime": 35.1205, - "eval_samples_per_second": 4.584, - "eval_steps_per_second": 0.085, - "eval_wer": 30.420219244823386, - "step": 36000 - }, - { - "epoch": 3.06, - "learning_rate": 3e-06, - "loss": 0.9923, - "step": 36050 - }, - { - "epoch": 3.06, - "learning_rate": 3e-06, - "loss": 0.9973, - "step": 36100 - }, - { - "epoch": 3.06, - "learning_rate": 3e-06, - "loss": 0.9879, - "step": 36150 - }, - { - "epoch": 3.06, - "learning_rate": 3e-06, - "loss": 0.9857, - "step": 36200 - }, - { - "epoch": 3.06, - "learning_rate": 3e-06, - "loss": 0.9931, - "step": 36250 - }, - { - "epoch": 3.06, - "learning_rate": 3e-06, - "loss": 0.9926, - "step": 36300 - }, - { - "epoch": 3.06, - "learning_rate": 3e-06, - "loss": 0.9892, - "step": 36350 - }, - { - "epoch": 3.07, - "learning_rate": 3e-06, - "loss": 0.982, - "step": 36400 - }, - { - "epoch": 3.07, - "learning_rate": 3e-06, - "loss": 0.9962, - "step": 36450 - }, - { - "epoch": 3.07, - "learning_rate": 3e-06, - "loss": 0.9871, - "step": 36500 - }, - { - "epoch": 3.07, - "learning_rate": 3e-06, - "loss": 0.9868, - "step": 36550 - }, - { - "epoch": 3.07, - "learning_rate": 3e-06, - "loss": 1.0011, - "step": 36600 - }, - { - "epoch": 3.07, - "learning_rate": 3e-06, - "loss": 0.9873, - "step": 36650 - }, - { - "epoch": 3.07, - "learning_rate": 3e-06, - "loss": 0.9946, - "step": 36700 - }, - { - "epoch": 3.07, - "learning_rate": 3e-06, - "loss": 1.004, - "step": 36750 - }, - { - "epoch": 3.07, - "learning_rate": 3e-06, - "loss": 0.9963, - "step": 36800 - }, - { - "epoch": 3.07, - "learning_rate": 3e-06, - "loss": 0.9822, - "step": 36850 - }, - { - "epoch": 3.07, - "learning_rate": 3e-06, - "loss": 0.9949, - "step": 36900 - }, - { - "epoch": 3.07, - "learning_rate": 3e-06, - "loss": 0.9866, - "step": 36950 - }, - { - "epoch": 3.07, - "learning_rate": 3e-06, - "loss": 1.0005, - "step": 37000 - }, - { - "epoch": 3.07, - "eval_loss": 0.5770267248153687, - "eval_runtime": 32.6377, - "eval_samples_per_second": 4.933, - "eval_steps_per_second": 0.092, - "eval_wer": 30.176613885505482, - "step": 37000 - }, - { - "epoch": 3.07, - "learning_rate": 3e-06, - "loss": 0.9882, - "step": 37050 - }, - { - "epoch": 3.07, - "learning_rate": 3e-06, - "loss": 0.9887, - "step": 37100 - }, - { - "epoch": 3.07, - "learning_rate": 3e-06, - "loss": 0.9863, - "step": 37150 - }, - { - "epoch": 3.07, - "learning_rate": 3e-06, - "loss": 0.9874, - "step": 37200 - }, - { - "epoch": 3.07, - "learning_rate": 3e-06, - "loss": 0.996, - "step": 37250 - }, - { - "epoch": 3.07, - "learning_rate": 3e-06, - "loss": 1.0031, - "step": 37300 - }, - { - "epoch": 3.07, - "learning_rate": 3e-06, - "loss": 0.9861, - "step": 37350 - }, - { - "epoch": 3.08, - "learning_rate": 3e-06, - "loss": 0.9843, - "step": 37400 - }, - { - "epoch": 3.08, - "learning_rate": 3e-06, - "loss": 0.9873, - "step": 37450 - }, - { - "epoch": 3.08, - "learning_rate": 3e-06, - "loss": 0.9939, - "step": 37500 - }, - { - "epoch": 3.08, - "learning_rate": 3e-06, - "loss": 0.9777, - "step": 37550 - }, - { - "epoch": 3.08, - "learning_rate": 3e-06, - "loss": 0.9785, - "step": 37600 - }, - { - "epoch": 3.08, - "learning_rate": 3e-06, - "loss": 1.0, - "step": 37650 - }, - { - "epoch": 3.08, - "learning_rate": 3e-06, - "loss": 0.9911, - "step": 37700 - }, - { - "epoch": 3.08, - "learning_rate": 3e-06, - "loss": 0.9952, - "step": 37750 - }, - { - "epoch": 3.08, - "learning_rate": 3e-06, - "loss": 0.9814, - "step": 37800 - }, - { - "epoch": 3.08, - "learning_rate": 3e-06, - "loss": 0.985, - "step": 37850 - }, - { - "epoch": 3.08, - "learning_rate": 3e-06, - "loss": 0.9881, - "step": 37900 - }, - { - "epoch": 3.08, - "learning_rate": 3e-06, - "loss": 0.9882, - "step": 37950 - }, - { - "epoch": 3.08, - "learning_rate": 3e-06, - "loss": 1.0017, - "step": 38000 - }, - { - "epoch": 3.08, - "eval_loss": 0.5733200907707214, - "eval_runtime": 38.0152, - "eval_samples_per_second": 4.235, - "eval_steps_per_second": 0.079, - "eval_wer": 29.65895249695493, - "step": 38000 - }, - { - "epoch": 3.08, - "learning_rate": 3e-06, - "loss": 0.987, - "step": 38050 - }, - { - "epoch": 3.08, - "learning_rate": 3e-06, - "loss": 0.9886, - "step": 38100 - }, - { - "epoch": 3.08, - "learning_rate": 3e-06, - "loss": 0.9859, - "step": 38150 - }, - { - "epoch": 4.0, - "learning_rate": 3e-06, - "loss": 0.9752, - "step": 38200 - }, - { - "epoch": 4.0, - "learning_rate": 3e-06, - "loss": 0.9817, - "step": 38250 - }, - { - "epoch": 4.0, - "learning_rate": 3e-06, - "loss": 0.9751, - "step": 38300 - }, - { - "epoch": 4.0, - "learning_rate": 3e-06, - "loss": 0.9714, - "step": 38350 - }, - { - "epoch": 4.0, - "learning_rate": 3e-06, - "loss": 0.9681, - "step": 38400 - }, - { - "epoch": 4.0, - "learning_rate": 3e-06, - "loss": 0.9914, - "step": 38450 - }, - { - "epoch": 4.0, - "learning_rate": 3e-06, - "loss": 0.9775, - "step": 38500 - }, - { - "epoch": 4.0, - "learning_rate": 3e-06, - "loss": 0.9779, - "step": 38550 - }, - { - "epoch": 4.0, - "learning_rate": 3e-06, - "loss": 0.9697, - "step": 38600 - }, - { - "epoch": 4.0, - "learning_rate": 3e-06, - "loss": 0.979, - "step": 38650 - }, - { - "epoch": 4.01, - "learning_rate": 3e-06, - "loss": 0.9747, - "step": 38700 - }, - { - "epoch": 4.01, - "learning_rate": 3e-06, - "loss": 0.9777, - "step": 38750 - }, - { - "epoch": 4.01, - "learning_rate": 3e-06, - "loss": 0.973, - "step": 38800 - }, - { - "epoch": 4.01, - "learning_rate": 3e-06, - "loss": 0.9783, - "step": 38850 - }, - { - "epoch": 4.01, - "learning_rate": 3e-06, - "loss": 0.9719, - "step": 38900 - }, - { - "epoch": 4.01, - "learning_rate": 3e-06, - "loss": 0.9732, - "step": 38950 - }, - { - "epoch": 4.01, - "learning_rate": 3e-06, - "loss": 0.9878, - "step": 39000 - }, - { - "epoch": 4.01, - "eval_loss": 0.5744524002075195, - "eval_runtime": 41.2775, - "eval_samples_per_second": 3.9, - "eval_steps_per_second": 0.073, - "eval_wer": 30.267965895249695, - "step": 39000 - }, - { - "epoch": 4.01, - "learning_rate": 3e-06, - "loss": 0.9827, - "step": 39050 - }, - { - "epoch": 4.01, - "learning_rate": 3e-06, - "loss": 0.9796, - "step": 39100 - }, - { - "epoch": 4.01, - "learning_rate": 3e-06, - "loss": 0.9726, - "step": 39150 - }, - { - "epoch": 4.01, - "learning_rate": 3e-06, - "loss": 0.984, - "step": 39200 - }, - { - "epoch": 4.01, - "learning_rate": 3e-06, - "loss": 0.9739, - "step": 39250 - }, - { - "epoch": 4.01, - "learning_rate": 3e-06, - "loss": 0.9885, - "step": 39300 - }, - { - "epoch": 4.01, - "learning_rate": 3e-06, - "loss": 0.9724, - "step": 39350 - }, - { - "epoch": 4.01, - "learning_rate": 3e-06, - "loss": 0.9701, - "step": 39400 - }, - { - "epoch": 4.01, - "learning_rate": 3e-06, - "loss": 0.9749, - "step": 39450 - }, - { - "epoch": 4.01, - "learning_rate": 3e-06, - "loss": 0.965, - "step": 39500 - }, - { - "epoch": 4.01, - "learning_rate": 3e-06, - "loss": 0.9805, - "step": 39550 - }, - { - "epoch": 4.01, - "learning_rate": 3e-06, - "loss": 0.9829, - "step": 39600 - }, - { - "epoch": 4.01, - "learning_rate": 3e-06, - "loss": 0.9782, - "step": 39650 - }, - { - "epoch": 4.02, - "learning_rate": 3e-06, - "loss": 0.9672, - "step": 39700 - }, - { - "epoch": 4.02, - "learning_rate": 3e-06, - "loss": 0.9763, - "step": 39750 - }, - { - "epoch": 4.02, - "learning_rate": 3e-06, - "loss": 0.9795, - "step": 39800 - }, - { - "epoch": 4.02, - "learning_rate": 3e-06, - "loss": 0.9731, - "step": 39850 - }, - { - "epoch": 4.02, - "learning_rate": 3e-06, - "loss": 0.9693, - "step": 39900 - }, - { - "epoch": 4.02, - "learning_rate": 3e-06, - "loss": 0.9708, - "step": 39950 - }, - { - "epoch": 4.02, - "learning_rate": 3e-06, - "loss": 0.9854, - "step": 40000 - }, - { - "epoch": 4.02, - "eval_loss": 0.5719765424728394, - "eval_runtime": 38.4572, - "eval_samples_per_second": 4.186, - "eval_steps_per_second": 0.078, - "eval_wer": 30.054811205846526, - "step": 40000 - }, - { - "epoch": 4.02, - "learning_rate": 3e-06, - "loss": 0.9628, - "step": 40050 - }, - { - "epoch": 4.02, - "learning_rate": 3e-06, - "loss": 0.9651, - "step": 40100 - }, - { - "epoch": 4.02, - "learning_rate": 3e-06, - "loss": 0.9627, - "step": 40150 - }, - { - "epoch": 4.02, - "learning_rate": 3e-06, - "loss": 0.9572, - "step": 40200 - }, - { - "epoch": 4.02, - "learning_rate": 3e-06, - "loss": 0.9681, - "step": 40250 - }, - { - "epoch": 4.02, - "learning_rate": 3e-06, - "loss": 0.9642, - "step": 40300 - }, - { - "epoch": 4.02, - "learning_rate": 3e-06, - "loss": 0.9817, - "step": 40350 - }, - { - "epoch": 4.02, - "learning_rate": 3e-06, - "loss": 0.9654, - "step": 40400 - }, - { - "epoch": 4.02, - "learning_rate": 3e-06, - "loss": 0.9799, - "step": 40450 - }, - { - "epoch": 4.02, - "learning_rate": 3e-06, - "loss": 0.973, - "step": 40500 - }, - { - "epoch": 4.02, - "learning_rate": 3e-06, - "loss": 0.9749, - "step": 40550 - }, - { - "epoch": 4.02, - "learning_rate": 3e-06, - "loss": 0.9651, - "step": 40600 - }, - { - "epoch": 4.02, - "learning_rate": 3e-06, - "loss": 0.9686, - "step": 40650 - }, - { - "epoch": 4.03, - "learning_rate": 3e-06, - "loss": 0.9808, - "step": 40700 - }, - { - "epoch": 4.03, - "learning_rate": 3e-06, - "loss": 0.9778, - "step": 40750 - }, - { - "epoch": 4.03, - "learning_rate": 3e-06, - "loss": 0.9736, - "step": 40800 - }, - { - "epoch": 4.03, - "learning_rate": 3e-06, - "loss": 0.9767, - "step": 40850 - }, - { - "epoch": 4.03, - "learning_rate": 3e-06, - "loss": 0.9866, - "step": 40900 - }, - { - "epoch": 4.03, - "learning_rate": 3e-06, - "loss": 0.9708, - "step": 40950 - }, - { - "epoch": 4.03, - "learning_rate": 3e-06, - "loss": 0.9624, - "step": 41000 - }, - { - "epoch": 4.03, - "eval_loss": 0.5703173279762268, - "eval_runtime": 39.8395, - "eval_samples_per_second": 4.041, - "eval_steps_per_second": 0.075, - "eval_wer": 29.598051157125454, - "step": 41000 - }, - { - "epoch": 4.03, - "learning_rate": 3e-06, - "loss": 0.975, - "step": 41050 - }, - { - "epoch": 4.03, - "learning_rate": 3e-06, - "loss": 0.9664, - "step": 41100 - }, - { - "epoch": 4.03, - "learning_rate": 3e-06, - "loss": 0.9583, - "step": 41150 - }, - { - "epoch": 4.03, - "learning_rate": 3e-06, - "loss": 0.9644, - "step": 41200 - }, - { - "epoch": 4.03, - "learning_rate": 3e-06, - "loss": 0.967, - "step": 41250 - }, - { - "epoch": 4.03, - "learning_rate": 3e-06, - "loss": 0.9632, - "step": 41300 - }, - { - "epoch": 4.03, - "learning_rate": 3e-06, - "loss": 0.9534, - "step": 41350 - }, - { - "epoch": 4.03, - "learning_rate": 3e-06, - "loss": 0.967, - "step": 41400 - }, - { - "epoch": 4.03, - "learning_rate": 3e-06, - "loss": 0.9712, - "step": 41450 - }, - { - "epoch": 4.03, - "learning_rate": 3e-06, - "loss": 0.966, - "step": 41500 - }, - { - "epoch": 4.03, - "learning_rate": 3e-06, - "loss": 0.9675, - "step": 41550 - }, - { - "epoch": 4.03, - "learning_rate": 3e-06, - "loss": 0.9728, - "step": 41600 - }, - { - "epoch": 4.03, - "learning_rate": 3e-06, - "loss": 0.9588, - "step": 41650 - }, - { - "epoch": 4.04, - "learning_rate": 3e-06, - "loss": 0.9715, - "step": 41700 - }, - { - "epoch": 4.04, - "learning_rate": 3e-06, - "loss": 0.9731, - "step": 41750 - }, - { - "epoch": 4.04, - "learning_rate": 3e-06, - "loss": 0.9717, - "step": 41800 - }, - { - "epoch": 4.04, - "learning_rate": 3e-06, - "loss": 0.9789, - "step": 41850 - }, - { - "epoch": 4.04, - "learning_rate": 3e-06, - "loss": 0.9653, - "step": 41900 - }, - { - "epoch": 4.04, - "learning_rate": 3e-06, - "loss": 0.9616, - "step": 41950 - }, - { - "epoch": 4.04, - "learning_rate": 3e-06, - "loss": 0.9639, - "step": 42000 - }, - { - "epoch": 4.04, - "eval_loss": 0.5681419968605042, - "eval_runtime": 39.2115, - "eval_samples_per_second": 4.106, - "eval_steps_per_second": 0.077, - "eval_wer": 29.506699147381244, - "step": 42000 - }, - { - "epoch": 4.04, - "learning_rate": 3e-06, - "loss": 0.956, - "step": 42050 - }, - { - "epoch": 4.04, - "learning_rate": 3e-06, - "loss": 0.9585, - "step": 42100 - }, - { - "epoch": 4.04, - "learning_rate": 3e-06, - "loss": 0.9598, - "step": 42150 - }, - { - "epoch": 4.04, - "learning_rate": 3e-06, - "loss": 0.9657, - "step": 42200 - }, - { - "epoch": 4.04, - "learning_rate": 3e-06, - "loss": 0.975, - "step": 42250 - }, - { - "epoch": 4.04, - "learning_rate": 3e-06, - "loss": 0.9592, - "step": 42300 - }, - { - "epoch": 4.04, - "learning_rate": 3e-06, - "loss": 0.9659, - "step": 42350 - }, - { - "epoch": 4.04, - "learning_rate": 3e-06, - "loss": 0.9548, - "step": 42400 - }, - { - "epoch": 4.04, - "learning_rate": 3e-06, - "loss": 0.9556, - "step": 42450 - }, - { - "epoch": 4.04, - "learning_rate": 3e-06, - "loss": 0.9593, - "step": 42500 - }, - { - "epoch": 4.04, - "learning_rate": 3e-06, - "loss": 0.9671, - "step": 42550 - }, - { - "epoch": 4.04, - "learning_rate": 3e-06, - "loss": 0.9665, - "step": 42600 - }, - { - "epoch": 4.04, - "learning_rate": 3e-06, - "loss": 0.9625, - "step": 42650 - }, - { - "epoch": 4.05, - "learning_rate": 3e-06, - "loss": 0.962, - "step": 42700 - }, - { - "epoch": 4.05, - "learning_rate": 3e-06, - "loss": 0.9502, - "step": 42750 - }, - { - "epoch": 4.05, - "learning_rate": 3e-06, - "loss": 0.9677, - "step": 42800 - }, - { - "epoch": 4.05, - "learning_rate": 3e-06, - "loss": 0.9566, - "step": 42850 - }, - { - "epoch": 4.05, - "learning_rate": 3e-06, - "loss": 0.9614, - "step": 42900 - }, - { - "epoch": 4.05, - "learning_rate": 3e-06, - "loss": 0.9663, - "step": 42950 - }, - { - "epoch": 4.05, - "learning_rate": 3e-06, - "loss": 0.9569, - "step": 43000 - }, - { - "epoch": 4.05, - "eval_loss": 0.5678820013999939, - "eval_runtime": 39.7895, - "eval_samples_per_second": 4.046, - "eval_steps_per_second": 0.075, - "eval_wer": 29.628501827040193, - "step": 43000 - }, - { - "epoch": 4.05, - "learning_rate": 3e-06, - "loss": 0.9676, - "step": 43050 - }, - { - "epoch": 4.05, - "learning_rate": 3e-06, - "loss": 0.966, - "step": 43100 - }, - { - "epoch": 4.05, - "learning_rate": 3e-06, - "loss": 0.9577, - "step": 43150 - }, - { - "epoch": 4.05, - "learning_rate": 3e-06, - "loss": 0.9658, - "step": 43200 - }, - { - "epoch": 4.05, - "learning_rate": 3e-06, - "loss": 0.9663, - "step": 43250 - }, - { - "epoch": 4.05, - "learning_rate": 3e-06, - "loss": 0.9638, - "step": 43300 - }, - { - "epoch": 4.05, - "learning_rate": 3e-06, - "loss": 0.9635, - "step": 43350 - }, - { - "epoch": 4.05, - "learning_rate": 3e-06, - "loss": 0.9631, - "step": 43400 - }, - { - "epoch": 4.05, - "learning_rate": 3e-06, - "loss": 0.9602, - "step": 43450 - }, - { - "epoch": 4.05, - "learning_rate": 3e-06, - "loss": 0.9619, - "step": 43500 - }, - { - "epoch": 4.05, - "learning_rate": 3e-06, - "loss": 0.9521, - "step": 43550 - }, - { - "epoch": 4.05, - "learning_rate": 3e-06, - "loss": 0.96, - "step": 43600 - }, - { - "epoch": 4.05, - "learning_rate": 3e-06, - "loss": 0.9672, - "step": 43650 - }, - { - "epoch": 4.06, - "learning_rate": 3e-06, - "loss": 0.9528, - "step": 43700 - }, - { - "epoch": 4.06, - "learning_rate": 3e-06, - "loss": 0.9627, - "step": 43750 - }, - { - "epoch": 4.06, - "learning_rate": 3e-06, - "loss": 0.9595, - "step": 43800 - }, - { - "epoch": 4.06, - "learning_rate": 3e-06, - "loss": 0.9695, - "step": 43850 - }, - { - "epoch": 4.06, - "learning_rate": 3e-06, - "loss": 0.9641, - "step": 43900 - }, - { - "epoch": 4.06, - "learning_rate": 3e-06, - "loss": 0.958, - "step": 43950 - }, - { - "epoch": 4.06, - "learning_rate": 3e-06, - "loss": 0.9682, - "step": 44000 - }, - { - "epoch": 4.06, - "eval_loss": 0.5643439292907715, - "eval_runtime": 39.2385, - "eval_samples_per_second": 4.103, - "eval_steps_per_second": 0.076, - "eval_wer": 29.567600487210722, - "step": 44000 - }, - { - "epoch": 4.06, - "learning_rate": 3e-06, - "loss": 0.9527, - "step": 44050 - }, - { - "epoch": 4.06, - "learning_rate": 3e-06, - "loss": 0.96, - "step": 44100 - }, - { - "epoch": 4.06, - "learning_rate": 3e-06, - "loss": 0.9641, - "step": 44150 - }, - { - "epoch": 4.06, - "learning_rate": 3e-06, - "loss": 0.9748, - "step": 44200 - }, - { - "epoch": 4.06, - "learning_rate": 3e-06, - "loss": 0.9659, - "step": 44250 - }, - { - "epoch": 4.06, - "learning_rate": 3e-06, - "loss": 0.9565, - "step": 44300 - }, - { - "epoch": 4.06, - "learning_rate": 3e-06, - "loss": 0.9606, - "step": 44350 - }, - { - "epoch": 4.06, - "learning_rate": 3e-06, - "loss": 0.9561, - "step": 44400 - }, - { - "epoch": 4.06, - "learning_rate": 3e-06, - "loss": 0.9578, - "step": 44450 - }, - { - "epoch": 4.06, - "learning_rate": 3e-06, - "loss": 0.9665, - "step": 44500 - }, - { - "epoch": 4.06, - "learning_rate": 3e-06, - "loss": 0.9657, - "step": 44550 - }, - { - "epoch": 4.06, - "learning_rate": 3e-06, - "loss": 0.9535, - "step": 44600 - }, - { - "epoch": 4.06, - "learning_rate": 3e-06, - "loss": 0.9457, - "step": 44650 - }, - { - "epoch": 4.07, - "learning_rate": 3e-06, - "loss": 0.9564, - "step": 44700 - }, - { - "epoch": 4.07, - "learning_rate": 3e-06, - "loss": 0.9508, - "step": 44750 - }, - { - "epoch": 4.07, - "learning_rate": 3e-06, - "loss": 0.9576, - "step": 44800 - }, - { - "epoch": 4.07, - "learning_rate": 3e-06, - "loss": 0.9515, - "step": 44850 - }, - { - "epoch": 4.07, - "learning_rate": 3e-06, - "loss": 0.9616, - "step": 44900 - }, - { - "epoch": 4.07, - "learning_rate": 3e-06, - "loss": 0.9585, - "step": 44950 - }, - { - "epoch": 4.07, - "learning_rate": 3e-06, - "loss": 0.9539, - "step": 45000 - }, - { - "epoch": 4.07, - "eval_loss": 0.5601379871368408, - "eval_runtime": 39.9689, - "eval_samples_per_second": 4.028, - "eval_steps_per_second": 0.075, - "eval_wer": 29.567600487210722, - "step": 45000 - }, - { - "epoch": 4.07, - "learning_rate": 3e-06, - "loss": 0.9527, - "step": 45050 - }, - { - "epoch": 4.07, - "learning_rate": 3e-06, - "loss": 0.9553, - "step": 45100 - }, - { - "epoch": 4.07, - "learning_rate": 3e-06, - "loss": 0.962, - "step": 45150 - }, - { - "epoch": 4.07, - "learning_rate": 3e-06, - "loss": 0.96, - "step": 45200 - }, - { - "epoch": 4.07, - "learning_rate": 3e-06, - "loss": 0.9679, - "step": 45250 - }, - { - "epoch": 4.07, - "learning_rate": 3e-06, - "loss": 0.9522, - "step": 45300 - }, - { - "epoch": 4.07, - "learning_rate": 3e-06, - "loss": 0.9536, - "step": 45350 - }, - { - "epoch": 4.07, - "learning_rate": 3e-06, - "loss": 0.9557, - "step": 45400 - }, - { - "epoch": 4.07, - "learning_rate": 3e-06, - "loss": 0.9501, - "step": 45450 - }, - { - "epoch": 4.07, - "learning_rate": 3e-06, - "loss": 0.9559, - "step": 45500 - }, - { - "epoch": 4.07, - "learning_rate": 3e-06, - "loss": 0.9539, - "step": 45550 - }, - { - "epoch": 4.07, - "learning_rate": 3e-06, - "loss": 0.9556, - "step": 45600 - }, - { - "epoch": 4.07, - "learning_rate": 3e-06, - "loss": 0.9492, - "step": 45650 - }, - { - "epoch": 4.08, - "learning_rate": 3e-06, - "loss": 0.9678, - "step": 45700 - }, - { - "epoch": 4.08, - "learning_rate": 3e-06, - "loss": 0.9634, - "step": 45750 - }, - { - "epoch": 4.08, - "learning_rate": 3e-06, - "loss": 0.9571, - "step": 45800 - }, - { - "epoch": 4.08, - "learning_rate": 3e-06, - "loss": 0.9508, - "step": 45850 - }, - { - "epoch": 4.08, - "learning_rate": 3e-06, - "loss": 0.9468, - "step": 45900 - }, - { - "epoch": 4.08, - "learning_rate": 3e-06, - "loss": 0.9511, - "step": 45950 - }, - { - "epoch": 4.08, - "learning_rate": 3e-06, - "loss": 0.946, - "step": 46000 - }, - { - "epoch": 4.08, - "eval_loss": 0.5562007427215576, - "eval_runtime": 40.2757, - "eval_samples_per_second": 3.997, - "eval_steps_per_second": 0.074, - "eval_wer": 29.71985383678441, - "step": 46000 - }, - { - "epoch": 4.08, - "learning_rate": 3e-06, - "loss": 0.954, - "step": 46050 - }, - { - "epoch": 4.08, - "learning_rate": 3e-06, - "loss": 0.9573, - "step": 46100 - }, - { - "epoch": 4.08, - "learning_rate": 3e-06, - "loss": 0.9508, - "step": 46150 - }, - { - "epoch": 4.08, - "learning_rate": 3e-06, - "loss": 0.9633, - "step": 46200 - }, - { - "epoch": 4.08, - "learning_rate": 3e-06, - "loss": 0.9547, - "step": 46250 - }, - { - "epoch": 4.08, - "learning_rate": 3e-06, - "loss": 0.9515, - "step": 46300 - }, - { - "epoch": 4.08, - "learning_rate": 3e-06, - "loss": 0.9551, - "step": 46350 - }, - { - "epoch": 4.08, - "learning_rate": 3e-06, - "loss": 0.9544, - "step": 46400 - }, - { - "epoch": 5.0, - "learning_rate": 3e-06, - "loss": 0.9554, - "step": 46450 - }, - { - "epoch": 5.0, - "learning_rate": 3e-06, - "loss": 0.9445, - "step": 46500 - }, - { - "epoch": 5.0, - "learning_rate": 3e-06, - "loss": 0.9536, - "step": 46550 - }, - { - "epoch": 5.0, - "learning_rate": 3e-06, - "loss": 0.9375, - "step": 46600 - }, - { - "epoch": 5.0, - "learning_rate": 3e-06, - "loss": 0.9414, - "step": 46650 - }, - { - "epoch": 5.0, - "learning_rate": 3e-06, - "loss": 0.9306, - "step": 46700 - }, - { - "epoch": 5.0, - "learning_rate": 3e-06, - "loss": 0.938, - "step": 46750 - }, - { - "epoch": 5.0, - "learning_rate": 3e-06, - "loss": 0.9453, - "step": 46800 - }, - { - "epoch": 5.0, - "learning_rate": 3e-06, - "loss": 0.9366, - "step": 46850 - }, - { - "epoch": 5.0, - "learning_rate": 3e-06, - "loss": 0.935, - "step": 46900 - }, - { - "epoch": 5.01, - "learning_rate": 3e-06, - "loss": 0.9403, - "step": 46950 - }, - { - "epoch": 5.01, - "learning_rate": 3e-06, - "loss": 0.9429, - "step": 47000 - }, - { - "epoch": 5.01, - "eval_loss": 0.5591687560081482, - "eval_runtime": 38.1863, - "eval_samples_per_second": 4.216, - "eval_steps_per_second": 0.079, - "eval_wer": 29.293544457978076, - "step": 47000 - }, - { - "epoch": 5.01, - "learning_rate": 3e-06, - "loss": 0.9423, - "step": 47050 - }, - { - "epoch": 5.01, - "learning_rate": 3e-06, - "loss": 0.9492, - "step": 47100 - }, - { - "epoch": 5.01, - "learning_rate": 3e-06, - "loss": 0.9528, - "step": 47150 - }, - { - "epoch": 5.01, - "learning_rate": 3e-06, - "loss": 0.9372, - "step": 47200 - }, - { - "epoch": 5.01, - "learning_rate": 3e-06, - "loss": 0.9502, - "step": 47250 - }, - { - "epoch": 5.01, - "learning_rate": 3e-06, - "loss": 0.9487, - "step": 47300 - }, - { - "epoch": 5.01, - "learning_rate": 3e-06, - "loss": 0.942, - "step": 47350 - }, - { - "epoch": 5.01, - "learning_rate": 3e-06, - "loss": 0.9428, - "step": 47400 - }, - { - "epoch": 5.01, - "learning_rate": 3e-06, - "loss": 0.9332, - "step": 47450 - }, - { - "epoch": 5.01, - "learning_rate": 3e-06, - "loss": 0.931, - "step": 47500 - }, - { - "epoch": 5.01, - "learning_rate": 3e-06, - "loss": 0.939, - "step": 47550 - }, - { - "epoch": 5.01, - "learning_rate": 3e-06, - "loss": 0.9487, - "step": 47600 - }, - { - "epoch": 5.01, - "learning_rate": 3e-06, - "loss": 0.9494, - "step": 47650 - }, - { - "epoch": 5.01, - "learning_rate": 3e-06, - "loss": 0.9377, - "step": 47700 - }, - { - "epoch": 5.01, - "learning_rate": 3e-06, - "loss": 0.9346, - "step": 47750 - }, - { - "epoch": 5.01, - "learning_rate": 3e-06, - "loss": 0.9398, - "step": 47800 - }, - { - "epoch": 5.01, - "learning_rate": 3e-06, - "loss": 0.9444, - "step": 47850 - }, - { - "epoch": 5.01, - "learning_rate": 3e-06, - "loss": 0.938, - "step": 47900 - }, - { - "epoch": 5.02, - "learning_rate": 3e-06, - "loss": 0.9552, - "step": 47950 - }, - { - "epoch": 5.02, - "learning_rate": 3e-06, - "loss": 0.9462, - "step": 48000 - }, - { - "epoch": 5.02, - "eval_loss": 0.553955078125, - "eval_runtime": 38.7517, - "eval_samples_per_second": 4.155, - "eval_steps_per_second": 0.077, - "eval_wer": 29.08038976857491, - "step": 48000 - }, - { - "epoch": 5.02, - "learning_rate": 3e-06, - "loss": 0.9412, - "step": 48050 - }, - { - "epoch": 5.02, - "learning_rate": 3e-06, - "loss": 0.9428, - "step": 48100 - }, - { - "epoch": 5.02, - "learning_rate": 3e-06, - "loss": 0.938, - "step": 48150 - }, - { - "epoch": 5.02, - "learning_rate": 3e-06, - "loss": 0.944, - "step": 48200 - }, - { - "epoch": 5.02, - "learning_rate": 3e-06, - "loss": 0.9342, - "step": 48250 - }, - { - "epoch": 5.02, - "learning_rate": 3e-06, - "loss": 0.9285, - "step": 48300 - }, - { - "epoch": 5.02, - "learning_rate": 3e-06, - "loss": 0.9444, - "step": 48350 - }, - { - "epoch": 5.02, - "learning_rate": 3e-06, - "loss": 0.9451, - "step": 48400 - }, - { - "epoch": 5.02, - "learning_rate": 3e-06, - "loss": 0.947, - "step": 48450 - }, - { - "epoch": 5.02, - "learning_rate": 3e-06, - "loss": 0.9345, - "step": 48500 - }, - { - "epoch": 5.02, - "learning_rate": 3e-06, - "loss": 0.9345, - "step": 48550 - }, - { - "epoch": 5.02, - "learning_rate": 3e-06, - "loss": 0.9345, - "step": 48600 - }, - { - "epoch": 5.02, - "learning_rate": 3e-06, - "loss": 0.9357, - "step": 48650 - }, - { - "epoch": 5.02, - "learning_rate": 3e-06, - "loss": 0.9329, - "step": 48700 - }, - { - "epoch": 5.02, - "learning_rate": 3e-06, - "loss": 0.939, - "step": 48750 - }, - { - "epoch": 5.02, - "learning_rate": 3e-06, - "loss": 0.928, - "step": 48800 - }, - { - "epoch": 5.02, - "learning_rate": 3e-06, - "loss": 0.9421, - "step": 48850 - }, - { - "epoch": 5.02, - "learning_rate": 3e-06, - "loss": 0.9341, - "step": 48900 - }, - { - "epoch": 5.03, - "learning_rate": 3e-06, - "loss": 0.9404, - "step": 48950 - }, - { - "epoch": 5.03, - "learning_rate": 3e-06, - "loss": 0.9312, - "step": 49000 - }, - { - "epoch": 5.03, - "eval_loss": 0.5535339713096619, - "eval_runtime": 37.3366, - "eval_samples_per_second": 4.312, - "eval_steps_per_second": 0.08, - "eval_wer": 29.293544457978076, - "step": 49000 - }, - { - "epoch": 5.03, - "learning_rate": 3e-06, - "loss": 0.9387, - "step": 49050 - }, - { - "epoch": 5.03, - "learning_rate": 3e-06, - "loss": 0.9342, - "step": 49100 - }, - { - "epoch": 5.03, - "learning_rate": 3e-06, - "loss": 0.9565, - "step": 49150 - }, - { - "epoch": 5.03, - "learning_rate": 3e-06, - "loss": 0.9475, - "step": 49200 - }, - { - "epoch": 5.03, - "learning_rate": 3e-06, - "loss": 0.9436, - "step": 49250 - }, - { - "epoch": 5.03, - "learning_rate": 3e-06, - "loss": 0.9334, - "step": 49300 - }, - { - "epoch": 5.03, - "learning_rate": 3e-06, - "loss": 0.9391, - "step": 49350 - }, - { - "epoch": 5.03, - "learning_rate": 3e-06, - "loss": 0.9501, - "step": 49400 - }, - { - "epoch": 5.03, - "learning_rate": 3e-06, - "loss": 0.9388, - "step": 49450 - }, - { - "epoch": 5.03, - "learning_rate": 3e-06, - "loss": 0.9229, - "step": 49500 - }, - { - "epoch": 5.03, - "learning_rate": 3e-06, - "loss": 0.9357, - "step": 49550 - }, - { - "epoch": 5.03, - "learning_rate": 3e-06, - "loss": 0.9329, - "step": 49600 - }, - { - "epoch": 5.03, - "learning_rate": 3e-06, - "loss": 0.941, - "step": 49650 - }, - { - "epoch": 5.03, - "learning_rate": 3e-06, - "loss": 0.9445, - "step": 49700 - }, - { - "epoch": 5.03, - "learning_rate": 3e-06, - "loss": 0.9346, - "step": 49750 - }, - { - "epoch": 5.03, - "learning_rate": 3e-06, - "loss": 0.9375, - "step": 49800 - }, - { - "epoch": 5.03, - "learning_rate": 3e-06, - "loss": 0.9466, - "step": 49850 - }, - { - "epoch": 5.03, - "learning_rate": 3e-06, - "loss": 0.9282, - "step": 49900 - }, - { - "epoch": 5.04, - "learning_rate": 3e-06, - "loss": 0.9481, - "step": 49950 - }, - { - "epoch": 5.04, - "learning_rate": 3e-06, - "loss": 0.9462, - "step": 50000 - }, - { - "epoch": 5.04, - "eval_loss": 0.5536479949951172, - "eval_runtime": 40.3035, - "eval_samples_per_second": 3.995, - "eval_steps_per_second": 0.074, - "eval_wer": 28.68453105968331, - "step": 50000 - }, - { - "epoch": 5.04, - "learning_rate": 3e-06, - "loss": 0.9388, - "step": 50050 - }, - { - "epoch": 5.04, - "learning_rate": 3e-06, - "loss": 0.9368, - "step": 50100 - }, - { - "epoch": 5.04, - "learning_rate": 3e-06, - "loss": 0.9338, - "step": 50150 - }, - { - "epoch": 5.04, - "learning_rate": 3e-06, - "loss": 0.9355, - "step": 50200 - }, - { - "epoch": 5.04, - "learning_rate": 3e-06, - "loss": 0.9375, - "step": 50250 - }, - { - "epoch": 5.04, - "learning_rate": 3e-06, - "loss": 0.9436, - "step": 50300 - }, - { - "epoch": 5.04, - "learning_rate": 3e-06, - "loss": 0.9347, - "step": 50350 - }, - { - "epoch": 5.04, - "learning_rate": 3e-06, - "loss": 0.9285, - "step": 50400 - }, - { - "epoch": 5.04, - "learning_rate": 3e-06, - "loss": 0.9388, - "step": 50450 - }, - { - "epoch": 5.04, - "learning_rate": 3e-06, - "loss": 0.9442, - "step": 50500 - }, - { - "epoch": 5.04, - "learning_rate": 3e-06, - "loss": 0.9356, - "step": 50550 - }, - { - "epoch": 5.04, - "learning_rate": 3e-06, - "loss": 0.9309, - "step": 50600 - }, - { - "epoch": 5.04, - "learning_rate": 3e-06, - "loss": 0.9289, - "step": 50650 - }, - { - "epoch": 5.04, - "learning_rate": 3e-06, - "loss": 0.9342, - "step": 50700 - }, - { - "epoch": 5.04, - "learning_rate": 3e-06, - "loss": 0.9484, - "step": 50750 - }, - { - "epoch": 5.04, - "learning_rate": 3e-06, - "loss": 0.9295, - "step": 50800 - }, - { - "epoch": 5.04, - "learning_rate": 3e-06, - "loss": 0.9277, - "step": 50850 - }, - { - "epoch": 5.04, - "learning_rate": 3e-06, - "loss": 0.9389, - "step": 50900 - }, - { - "epoch": 5.05, - "learning_rate": 3e-06, - "loss": 0.9365, - "step": 50950 - }, - { - "epoch": 5.05, - "learning_rate": 3e-06, - "loss": 0.922, - "step": 51000 - }, - { - "epoch": 5.05, - "eval_loss": 0.5538690686225891, - "eval_runtime": 37.226, - "eval_samples_per_second": 4.325, - "eval_steps_per_second": 0.081, - "eval_wer": 28.714981729598048, - "step": 51000 - }, - { - "epoch": 5.05, - "learning_rate": 3e-06, - "loss": 0.9281, - "step": 51050 - }, - { - "epoch": 5.05, - "learning_rate": 3e-06, - "loss": 0.9453, - "step": 51100 - }, - { - "epoch": 5.05, - "learning_rate": 3e-06, - "loss": 0.9445, - "step": 51150 - }, - { - "epoch": 5.05, - "learning_rate": 3e-06, - "loss": 0.9396, - "step": 51200 - }, - { - "epoch": 5.05, - "learning_rate": 3e-06, - "loss": 0.9404, - "step": 51250 - }, - { - "epoch": 5.05, - "learning_rate": 3e-06, - "loss": 0.9301, - "step": 51300 - }, - { - "epoch": 5.05, - "learning_rate": 3e-06, - "loss": 0.9286, - "step": 51350 - }, - { - "epoch": 5.05, - "learning_rate": 3e-06, - "loss": 0.9303, - "step": 51400 - }, - { - "epoch": 5.05, - "learning_rate": 3e-06, - "loss": 0.9348, - "step": 51450 - }, - { - "epoch": 5.05, - "learning_rate": 3e-06, - "loss": 0.9356, - "step": 51500 - }, - { - "epoch": 5.05, - "learning_rate": 3e-06, - "loss": 0.9283, - "step": 51550 - }, - { - "epoch": 5.05, - "learning_rate": 3e-06, - "loss": 0.9432, - "step": 51600 - }, - { - "epoch": 5.05, - "learning_rate": 3e-06, - "loss": 0.9416, - "step": 51650 - }, - { - "epoch": 5.05, - "learning_rate": 3e-06, - "loss": 0.9319, - "step": 51700 - }, - { - "epoch": 5.05, - "learning_rate": 3e-06, - "loss": 0.925, - "step": 51750 - }, - { - "epoch": 5.05, - "learning_rate": 3e-06, - "loss": 0.95, - "step": 51800 - }, - { - "epoch": 5.05, - "learning_rate": 3e-06, - "loss": 0.9148, - "step": 51850 - }, - { - "epoch": 5.05, - "learning_rate": 3e-06, - "loss": 0.9348, - "step": 51900 - }, - { - "epoch": 5.06, - "learning_rate": 3e-06, - "loss": 0.9395, - "step": 51950 - }, - { - "epoch": 5.06, - "learning_rate": 3e-06, - "loss": 0.9253, - "step": 52000 - }, - { - "epoch": 5.06, - "eval_loss": 0.5509808659553528, - "eval_runtime": 35.2782, - "eval_samples_per_second": 4.564, - "eval_steps_per_second": 0.085, - "eval_wer": 28.836784409257003, - "step": 52000 - }, - { - "epoch": 0.0, - "learning_rate": 3e-06, - "loss": 0.9213, - "step": 52050 - }, - { - "epoch": 0.0, - "learning_rate": 3e-06, - "loss": 0.9149, - "step": 52100 - }, - { - "epoch": 0.0, - "learning_rate": 3e-06, - "loss": 0.9275, - "step": 52150 - }, - { - "epoch": 0.0, - "learning_rate": 3e-06, - "loss": 0.9333, - "step": 52200 - }, - { - "epoch": 0.0, - "learning_rate": 3e-06, - "loss": 0.9209, - "step": 52250 - }, - { - "epoch": 0.0, - "learning_rate": 3e-06, - "loss": 0.9104, - "step": 52300 - }, - { - "epoch": 0.0, - "learning_rate": 3e-06, - "loss": 0.9305, - "step": 52350 - }, - { - "epoch": 0.0, - "learning_rate": 3e-06, - "loss": 0.9208, - "step": 52400 - }, - { - "epoch": 0.0, - "learning_rate": 3e-06, - "loss": 0.9311, - "step": 52450 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 0.9315, - "step": 52500 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 0.9281, - "step": 52550 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 0.9199, - "step": 52600 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 0.9235, - "step": 52650 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 0.934, - "step": 52700 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 0.916, - "step": 52750 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 0.9148, - "step": 52800 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 0.9259, - "step": 52850 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 0.9238, - "step": 52900 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 0.9224, - "step": 52950 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 0.9065, - "step": 53000 - }, - { - "epoch": 0.01, - "eval_loss": 0.5493320226669312, - "eval_runtime": 40.3992, - "eval_samples_per_second": 3.985, - "eval_steps_per_second": 0.074, - "eval_wer": 28.5931790499391, - "step": 53000 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 0.9211, - "step": 53050 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 0.9196, - "step": 53100 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 0.9215, - "step": 53150 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 0.9177, - "step": 53200 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 0.9131, - "step": 53250 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 0.9228, - "step": 53300 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 0.9292, - "step": 53350 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 0.9221, - "step": 53400 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 0.9316, - "step": 53450 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 0.9125, - "step": 53500 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 0.9298, - "step": 53550 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 0.9227, - "step": 53600 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 0.9116, - "step": 53650 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 0.9297, - "step": 53700 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 0.933, - "step": 53750 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 0.9221, - "step": 53800 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 0.919, - "step": 53850 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 0.9263, - "step": 53900 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 0.9159, - "step": 53950 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 0.9096, - "step": 54000 - }, - { - "epoch": 0.02, - "eval_loss": 0.5489500164985657, - "eval_runtime": 39.6766, - "eval_samples_per_second": 4.058, - "eval_steps_per_second": 0.076, - "eval_wer": 28.501827040194883, - "step": 54000 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 0.9126, - "step": 54050 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 0.9237, - "step": 54100 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 0.9134, - "step": 54150 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 0.9093, - "step": 54200 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 0.9241, - "step": 54250 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 0.9302, - "step": 54300 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 0.9209, - "step": 54350 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 0.9197, - "step": 54400 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 0.9119, - "step": 54450 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 0.9085, - "step": 54500 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 0.9109, - "step": 54550 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 0.9121, - "step": 54600 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 0.927, - "step": 54650 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 0.9125, - "step": 54700 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 0.9125, - "step": 54750 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 0.917, - "step": 54800 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 0.9195, - "step": 54850 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 0.9271, - "step": 54900 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 0.9256, - "step": 54950 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 0.9329, - "step": 55000 - }, - { - "epoch": 0.03, - "eval_loss": 0.5482864379882812, - "eval_runtime": 37.1746, - "eval_samples_per_second": 4.331, - "eval_steps_per_second": 0.081, - "eval_wer": 28.288672350791717, - "step": 55000 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 0.9241, - "step": 55050 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 0.9263, - "step": 55100 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 0.9189, - "step": 55150 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 0.9318, - "step": 55200 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 0.9092, - "step": 55250 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 0.9066, - "step": 55300 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 0.9141, - "step": 55350 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 0.9221, - "step": 55400 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 0.9318, - "step": 55450 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 0.9198, - "step": 55500 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 0.9211, - "step": 55550 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 0.9102, - "step": 55600 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 0.9127, - "step": 55650 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 0.9206, - "step": 55700 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 0.9129, - "step": 55750 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 0.913, - "step": 55800 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 0.9265, - "step": 55850 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 0.9235, - "step": 55900 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 0.9131, - "step": 55950 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 0.9181, - "step": 56000 - }, - { - "epoch": 0.04, - "eval_loss": 0.5471073985099792, - "eval_runtime": 39.0291, - "eval_samples_per_second": 4.125, - "eval_steps_per_second": 0.077, - "eval_wer": 27.98416565164434, - "step": 56000 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 0.932, - "step": 56050 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 0.9324, - "step": 56100 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 0.9159, - "step": 56150 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 0.9267, - "step": 56200 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 0.9172, - "step": 56250 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 0.9271, - "step": 56300 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 0.918, - "step": 56350 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 0.9301, - "step": 56400 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 0.931, - "step": 56450 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 0.9095, - "step": 56500 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 0.9182, - "step": 56550 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 0.9115, - "step": 56600 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 0.9187, - "step": 56650 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 0.9336, - "step": 56700 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 0.9269, - "step": 56750 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 0.915, - "step": 56800 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 0.9263, - "step": 56850 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 0.9049, - "step": 56900 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 0.9169, - "step": 56950 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 0.914, - "step": 57000 - }, - { - "epoch": 0.05, - "eval_loss": 0.545651912689209, - "eval_runtime": 38.4894, - "eval_samples_per_second": 4.183, - "eval_steps_per_second": 0.078, - "eval_wer": 28.410475030450673, - "step": 57000 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 0.92, - "step": 57050 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 0.9217, - "step": 57100 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 0.9247, - "step": 57150 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 0.9188, - "step": 57200 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 0.9181, - "step": 57250 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 0.9123, - "step": 57300 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 0.9202, - "step": 57350 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 0.9264, - "step": 57400 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 0.9157, - "step": 57450 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 0.9074, - "step": 57500 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 0.9108, - "step": 57550 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 0.908, - "step": 57600 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 0.9189, - "step": 57650 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 0.9189, - "step": 57700 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 0.9135, - "step": 57750 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 0.9157, - "step": 57800 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 0.9218, - "step": 57850 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 0.9147, - "step": 57900 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 0.9228, - "step": 57950 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 0.9149, - "step": 58000 - }, - { - "epoch": 0.06, - "eval_loss": 0.5448886752128601, - "eval_runtime": 39.6395, - "eval_samples_per_second": 4.062, - "eval_steps_per_second": 0.076, - "eval_wer": 27.588306942752737, - "step": 58000 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 0.9191, - "step": 58050 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 0.9215, - "step": 58100 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 0.9133, - "step": 58150 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 0.9152, - "step": 58200 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 0.9034, - "step": 58250 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 0.9183, - "step": 58300 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 0.9137, - "step": 58350 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 0.9186, - "step": 58400 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 0.9166, - "step": 58450 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 0.9153, - "step": 58500 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 0.9159, - "step": 58550 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 0.921, - "step": 58600 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 0.9109, - "step": 58650 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 0.9079, - "step": 58700 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 0.9129, - "step": 58750 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 0.9141, - "step": 58800 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 0.9191, - "step": 58850 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 0.9078, - "step": 58900 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 0.9067, - "step": 58950 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 0.9092, - "step": 59000 - }, - { - "epoch": 0.07, - "eval_loss": 0.5405263304710388, - "eval_runtime": 39.5678, - "eval_samples_per_second": 4.069, - "eval_steps_per_second": 0.076, - "eval_wer": 27.831912302070645, - "step": 59000 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 0.9125, - "step": 59050 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 0.9044, - "step": 59100 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 0.9086, - "step": 59150 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 0.9177, - "step": 59200 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 0.9152, - "step": 59250 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 0.9193, - "step": 59300 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 0.9148, - "step": 59350 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 0.9, - "step": 59400 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 0.9084, - "step": 59450 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 0.9054, - "step": 59500 - }, - { - "epoch": 0.08, - "learning_rate": 3e-06, - "loss": 0.8991, - "step": 59550 - }, - { - "epoch": 0.08, - "learning_rate": 3e-06, - "loss": 0.9028, - "step": 59600 - }, - { - "epoch": 0.08, - "learning_rate": 3e-06, - "loss": 0.9068, - "step": 59650 - }, - { - "epoch": 0.08, - "learning_rate": 3e-06, - "loss": 0.9252, - "step": 59700 - }, - { - "epoch": 0.08, - "learning_rate": 3e-06, - "loss": 0.9156, - "step": 59750 - }, - { - "epoch": 0.08, - "learning_rate": 3e-06, - "loss": 0.9235, - "step": 59800 - }, - { - "epoch": 0.08, - "learning_rate": 3e-06, - "loss": 0.903, - "step": 59850 - }, - { - "epoch": 0.08, - "learning_rate": 3e-06, - "loss": 0.9125, - "step": 59900 - }, - { - "epoch": 0.08, - "learning_rate": 3e-06, - "loss": 0.9124, - "step": 59950 - }, - { - "epoch": 0.08, - "learning_rate": 3e-06, - "loss": 0.9101, - "step": 60000 - }, - { - "epoch": 0.08, - "eval_loss": 0.540199339389801, - "eval_runtime": 38.3011, - "eval_samples_per_second": 4.204, - "eval_steps_per_second": 0.078, - "eval_wer": 27.344701583434833, - "step": 60000 - }, - { - "epoch": 0.08, - "learning_rate": 3e-06, - "loss": 0.9211, - "step": 60050 - }, - { - "epoch": 0.08, - "learning_rate": 3e-06, - "loss": 0.9131, - "step": 60100 - }, - { - "epoch": 0.08, - "learning_rate": 3e-06, - "loss": 0.9119, - "step": 60150 - }, - { - "epoch": 0.08, - "learning_rate": 3e-06, - "loss": 0.9098, - "step": 60200 - }, - { - "epoch": 0.08, - "learning_rate": 3e-06, - "loss": 0.9096, - "step": 60250 - }, - { - "epoch": 1.0, - "learning_rate": 3e-06, - "loss": 0.9053, - "step": 60300 - }, - { - "epoch": 1.0, - "learning_rate": 3e-06, - "loss": 0.9087, - "step": 60350 - }, - { - "epoch": 1.0, - "learning_rate": 3e-06, - "loss": 0.8995, - "step": 60400 - }, - { - "epoch": 1.0, - "learning_rate": 3e-06, - "loss": 0.8805, - "step": 60450 - }, - { - "epoch": 1.0, - "learning_rate": 3e-06, - "loss": 0.8959, - "step": 60500 - }, - { - "epoch": 1.0, - "learning_rate": 3e-06, - "loss": 0.9045, - "step": 60550 - }, - { - "epoch": 1.0, - "learning_rate": 3e-06, - "loss": 0.9092, - "step": 60600 - }, - { - "epoch": 1.0, - "learning_rate": 3e-06, - "loss": 0.9041, - "step": 60650 - }, - { - "epoch": 1.0, - "learning_rate": 3e-06, - "loss": 0.912, - "step": 60700 - }, - { - "epoch": 1.0, - "learning_rate": 3e-06, - "loss": 0.9056, - "step": 60750 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 0.8947, - "step": 60800 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 0.9007, - "step": 60850 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 0.8982, - "step": 60900 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 0.9015, - "step": 60950 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 0.9046, - "step": 61000 - }, - { - "epoch": 1.01, - "eval_loss": 0.5374026298522949, - "eval_runtime": 40.3306, - "eval_samples_per_second": 3.992, - "eval_steps_per_second": 0.074, - "eval_wer": 27.557856272838006, - "step": 61000 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 0.8987, - "step": 61050 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 0.9055, - "step": 61100 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 0.9051, - "step": 61150 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 0.8914, - "step": 61200 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 0.8905, - "step": 61250 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 0.8891, - "step": 61300 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 0.9029, - "step": 61350 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 0.9086, - "step": 61400 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 0.9094, - "step": 61450 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 0.9161, - "step": 61500 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 0.9026, - "step": 61550 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 0.8919, - "step": 61600 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 0.8966, - "step": 61650 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 0.8983, - "step": 61700 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 0.8964, - "step": 61750 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 0.8927, - "step": 61800 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 0.9035, - "step": 61850 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 0.8993, - "step": 61900 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 0.8943, - "step": 61950 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 0.8917, - "step": 62000 - }, - { - "epoch": 1.02, - "eval_loss": 0.5390424132347107, - "eval_runtime": 37.0496, - "eval_samples_per_second": 4.346, - "eval_steps_per_second": 0.081, - "eval_wer": 27.740560292326432, - "step": 62000 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 0.9055, - "step": 62050 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 0.9059, - "step": 62100 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 0.9021, - "step": 62150 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 0.8927, - "step": 62200 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 0.901, - "step": 62250 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 0.8946, - "step": 62300 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 0.9117, - "step": 62350 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 0.8972, - "step": 62400 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 0.8969, - "step": 62450 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 0.8873, - "step": 62500 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 0.9085, - "step": 62550 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 0.9078, - "step": 62600 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 0.8987, - "step": 62650 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 0.8939, - "step": 62700 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 0.8953, - "step": 62750 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 0.8906, - "step": 62800 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 0.9033, - "step": 62850 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 0.8981, - "step": 62900 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 0.9033, - "step": 62950 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 0.8993, - "step": 63000 - }, - { - "epoch": 1.03, - "eval_loss": 0.5385776162147522, - "eval_runtime": 39.1791, - "eval_samples_per_second": 4.109, - "eval_steps_per_second": 0.077, - "eval_wer": 27.40560292326431, - "step": 63000 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 0.9011, - "step": 63050 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 0.9103, - "step": 63100 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 0.9001, - "step": 63150 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 0.8957, - "step": 63200 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 0.9021, - "step": 63250 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 0.8933, - "step": 63300 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 0.8868, - "step": 63350 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 0.887, - "step": 63400 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 0.8961, - "step": 63450 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 0.9008, - "step": 63500 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 0.8922, - "step": 63550 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 0.8862, - "step": 63600 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 0.8865, - "step": 63650 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 0.9023, - "step": 63700 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 0.8946, - "step": 63750 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 0.8954, - "step": 63800 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 0.8864, - "step": 63850 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 0.8879, - "step": 63900 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 0.8884, - "step": 63950 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 0.8875, - "step": 64000 - }, - { - "epoch": 1.04, - "eval_loss": 0.5361135601997375, - "eval_runtime": 37.6948, - "eval_samples_per_second": 4.271, - "eval_steps_per_second": 0.08, - "eval_wer": 26.857490864799026, - "step": 64000 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 0.9006, - "step": 64050 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 0.8948, - "step": 64100 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 0.8909, - "step": 64150 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 0.8905, - "step": 64200 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 0.8878, - "step": 64250 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 0.8985, - "step": 64300 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 0.9024, - "step": 64350 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 0.8999, - "step": 64400 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 0.9004, - "step": 64450 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 0.8974, - "step": 64500 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 0.8914, - "step": 64550 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 0.9016, - "step": 64600 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 0.8866, - "step": 64650 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 0.8997, - "step": 64700 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 0.8828, - "step": 64750 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 0.9024, - "step": 64800 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 0.8879, - "step": 64850 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 0.907, - "step": 64900 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 0.9021, - "step": 64950 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 0.8892, - "step": 65000 - }, - { - "epoch": 1.05, - "eval_loss": 0.5358411073684692, - "eval_runtime": 38.731, - "eval_samples_per_second": 4.157, - "eval_steps_per_second": 0.077, - "eval_wer": 27.344701583434833, - "step": 65000 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 0.9063, - "step": 65050 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 0.8907, - "step": 65100 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 0.8849, - "step": 65150 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 0.8857, - "step": 65200 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 0.8937, - "step": 65250 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 0.8851, - "step": 65300 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 0.8878, - "step": 65350 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 0.9036, - "step": 65400 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 0.91, - "step": 65450 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 0.8864, - "step": 65500 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 0.8888, - "step": 65550 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 0.8933, - "step": 65600 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 0.8898, - "step": 65650 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 0.9006, - "step": 65700 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 0.8957, - "step": 65750 - }, - { - "epoch": 1.06, - "learning_rate": 3e-06, - "loss": 0.8887, - "step": 65800 - }, - { - "epoch": 1.06, - "learning_rate": 3e-06, - "loss": 0.8967, - "step": 65850 - }, - { - "epoch": 1.06, - "learning_rate": 3e-06, - "loss": 0.895, - "step": 65900 - }, - { - "epoch": 1.06, - "learning_rate": 3e-06, - "loss": 0.8955, - "step": 65950 - }, - { - "epoch": 1.06, - "learning_rate": 3e-06, - "loss": 0.8929, - "step": 66000 - }, - { - "epoch": 1.06, - "eval_loss": 0.5346468091011047, - "eval_runtime": 38.4533, - "eval_samples_per_second": 4.187, - "eval_steps_per_second": 0.078, - "eval_wer": 26.73568818514007, - "step": 66000 - }, - { - "epoch": 0.0, - "learning_rate": 3e-06, - "loss": 0.8919, - "step": 66050 - }, - { - "epoch": 0.0, - "learning_rate": 3e-06, - "loss": 0.8799, - "step": 66100 - }, - { - "epoch": 0.0, - "learning_rate": 3e-06, - "loss": 0.8854, - "step": 66150 - }, - { - "epoch": 0.0, - "learning_rate": 3e-06, - "loss": 0.884, - "step": 66200 - }, - { - "epoch": 0.0, - "learning_rate": 3e-06, - "loss": 0.8742, - "step": 66250 - }, - { - "epoch": 0.0, - "learning_rate": 3e-06, - "loss": 0.8693, - "step": 66300 - }, - { - "epoch": 0.0, - "learning_rate": 3e-06, - "loss": 0.895, - "step": 66350 - }, - { - "epoch": 0.0, - "learning_rate": 3e-06, - "loss": 0.886, - "step": 66400 - }, - { - "epoch": 0.0, - "learning_rate": 3e-06, - "loss": 0.8899, - "step": 66450 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 0.8952, - "step": 66500 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 0.8939, - "step": 66550 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 0.8787, - "step": 66600 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 0.881, - "step": 66650 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 0.8903, - "step": 66700 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 0.8749, - "step": 66750 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 0.8794, - "step": 66800 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 0.899, - "step": 66850 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 0.8862, - "step": 66900 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 0.8834, - "step": 66950 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 0.8703, - "step": 67000 - }, - { - "epoch": 0.01, - "eval_loss": 0.5332406759262085, - "eval_runtime": 38.9701, - "eval_samples_per_second": 4.131, - "eval_steps_per_second": 0.077, - "eval_wer": 26.82704019488429, - "step": 67000 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 0.891, - "step": 67050 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 0.8921, - "step": 67100 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 0.8917, - "step": 67150 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 0.8824, - "step": 67200 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 0.8735, - "step": 67250 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 0.8824, - "step": 67300 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 0.8919, - "step": 67350 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 0.8903, - "step": 67400 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 0.8917, - "step": 67450 - }, - { - "epoch": 0.01, - "learning_rate": 3e-06, - "loss": 0.872, - "step": 67500 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 0.89, - "step": 67550 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 0.8824, - "step": 67600 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 0.8718, - "step": 67650 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 0.8881, - "step": 67700 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 0.8933, - "step": 67750 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 0.8834, - "step": 67800 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 0.878, - "step": 67850 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 0.8871, - "step": 67900 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 0.8799, - "step": 67950 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 0.8709, - "step": 68000 - }, - { - "epoch": 0.02, - "eval_loss": 0.5335850715637207, - "eval_runtime": 40.527, - "eval_samples_per_second": 3.973, - "eval_steps_per_second": 0.074, - "eval_wer": 26.705237515225335, - "step": 68000 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 0.8754, - "step": 68050 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 0.8951, - "step": 68100 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 0.8765, - "step": 68150 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 0.8698, - "step": 68200 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 0.8834, - "step": 68250 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 0.891, - "step": 68300 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 0.8845, - "step": 68350 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 0.8918, - "step": 68400 - }, - { - "epoch": 0.02, - "learning_rate": 3e-06, - "loss": 0.8838, - "step": 68450 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 0.8798, - "step": 68500 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 0.8744, - "step": 68550 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 0.878, - "step": 68600 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 0.8953, - "step": 68650 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 0.8737, - "step": 68700 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 0.8733, - "step": 68750 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 0.8783, - "step": 68800 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 0.8885, - "step": 68850 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 0.8957, - "step": 68900 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 0.8853, - "step": 68950 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 0.8917, - "step": 69000 - }, - { - "epoch": 0.03, - "eval_loss": 0.5328567028045654, - "eval_runtime": 39.7121, - "eval_samples_per_second": 4.054, - "eval_steps_per_second": 0.076, - "eval_wer": 27.070645554202194, - "step": 69000 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 0.8826, - "step": 69050 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 0.8879, - "step": 69100 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 0.8796, - "step": 69150 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 0.8915, - "step": 69200 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 0.8707, - "step": 69250 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 0.8722, - "step": 69300 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 0.8864, - "step": 69350 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 0.8908, - "step": 69400 - }, - { - "epoch": 0.03, - "learning_rate": 3e-06, - "loss": 0.9004, - "step": 69450 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 0.89, - "step": 69500 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 0.885, - "step": 69550 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 0.8716, - "step": 69600 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 0.876, - "step": 69650 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 0.8831, - "step": 69700 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 0.8746, - "step": 69750 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 0.8718, - "step": 69800 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 0.8868, - "step": 69850 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 0.8855, - "step": 69900 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 0.884, - "step": 69950 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 0.8867, - "step": 70000 - }, - { - "epoch": 0.04, - "eval_loss": 0.5323489904403687, - "eval_runtime": 38.5858, - "eval_samples_per_second": 4.173, - "eval_steps_per_second": 0.078, - "eval_wer": 26.33982947624848, - "step": 70000 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 0.892, - "step": 70050 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 0.8939, - "step": 70100 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 0.8801, - "step": 70150 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 0.8955, - "step": 70200 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 0.8804, - "step": 70250 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 0.8877, - "step": 70300 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 0.887, - "step": 70350 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 0.8989, - "step": 70400 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 0.8997, - "step": 70450 - }, - { - "epoch": 0.04, - "learning_rate": 3e-06, - "loss": 0.873, - "step": 70500 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 0.8836, - "step": 70550 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 0.8801, - "step": 70600 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 0.8835, - "step": 70650 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 0.8997, - "step": 70700 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 0.8984, - "step": 70750 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 0.8789, - "step": 70800 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 0.8869, - "step": 70850 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 0.8683, - "step": 70900 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 0.8801, - "step": 70950 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 0.8778, - "step": 71000 - }, - { - "epoch": 0.05, - "eval_loss": 0.5315227508544922, - "eval_runtime": 40.5435, - "eval_samples_per_second": 3.971, - "eval_steps_per_second": 0.074, - "eval_wer": 27.28380024360536, - "step": 71000 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 0.8828, - "step": 71050 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 0.8836, - "step": 71100 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 0.8853, - "step": 71150 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 0.8885, - "step": 71200 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 0.8895, - "step": 71250 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 0.884, - "step": 71300 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 0.8843, - "step": 71350 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 0.8881, - "step": 71400 - }, - { - "epoch": 0.05, - "learning_rate": 3e-06, - "loss": 0.8797, - "step": 71450 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 0.8716, - "step": 71500 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 0.8747, - "step": 71550 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 0.8715, - "step": 71600 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 0.8816, - "step": 71650 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 0.8806, - "step": 71700 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 0.8751, - "step": 71750 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 0.8798, - "step": 71800 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 0.8906, - "step": 71850 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 0.8773, - "step": 71900 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 0.8835, - "step": 71950 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 0.8757, - "step": 72000 - }, - { - "epoch": 0.06, - "eval_loss": 0.5316939353942871, - "eval_runtime": 41.0725, - "eval_samples_per_second": 3.92, - "eval_steps_per_second": 0.073, - "eval_wer": 26.248477466504262, - "step": 72000 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 0.8853, - "step": 72050 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 0.8912, - "step": 72100 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 0.8848, - "step": 72150 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 0.8867, - "step": 72200 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 0.8751, - "step": 72250 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 0.8824, - "step": 72300 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 0.8771, - "step": 72350 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 0.8824, - "step": 72400 - }, - { - "epoch": 0.06, - "learning_rate": 3e-06, - "loss": 0.8848, - "step": 72450 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 0.8821, - "step": 72500 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 0.8774, - "step": 72550 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 0.8841, - "step": 72600 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 0.8749, - "step": 72650 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 0.8716, - "step": 72700 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 0.8805, - "step": 72750 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 0.8838, - "step": 72800 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 0.8822, - "step": 72850 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 0.872, - "step": 72900 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 0.8699, - "step": 72950 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 0.8726, - "step": 73000 - }, - { - "epoch": 0.07, - "eval_loss": 0.5268862843513489, - "eval_runtime": 38.1185, - "eval_samples_per_second": 4.224, - "eval_steps_per_second": 0.079, - "eval_wer": 26.64433617539586, - "step": 73000 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 0.8761, - "step": 73050 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 0.8675, - "step": 73100 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 0.8775, - "step": 73150 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 0.889, - "step": 73200 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 0.8847, - "step": 73250 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 0.8843, - "step": 73300 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 0.8765, - "step": 73350 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 0.8638, - "step": 73400 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 0.8724, - "step": 73450 - }, - { - "epoch": 0.07, - "learning_rate": 3e-06, - "loss": 0.8697, - "step": 73500 - }, - { - "epoch": 0.08, - "learning_rate": 3e-06, - "loss": 0.8634, - "step": 73550 - }, - { - "epoch": 0.08, - "learning_rate": 3e-06, - "loss": 0.8669, - "step": 73600 - }, - { - "epoch": 0.08, - "learning_rate": 3e-06, - "loss": 0.8701, - "step": 73650 - }, - { - "epoch": 0.08, - "learning_rate": 3e-06, - "loss": 0.8936, - "step": 73700 - }, - { - "epoch": 0.08, - "learning_rate": 3e-06, - "loss": 0.8845, - "step": 73750 - }, - { - "epoch": 0.08, - "learning_rate": 3e-06, - "loss": 0.8858, - "step": 73800 - }, - { - "epoch": 0.08, - "learning_rate": 3e-06, - "loss": 0.8662, - "step": 73850 - }, - { - "epoch": 0.08, - "learning_rate": 3e-06, - "loss": 0.8769, - "step": 73900 - }, - { - "epoch": 0.08, - "learning_rate": 3e-06, - "loss": 0.8843, - "step": 73950 - }, - { - "epoch": 0.08, - "learning_rate": 3e-06, - "loss": 0.8792, - "step": 74000 - }, - { - "epoch": 0.08, - "eval_loss": 0.526807427406311, - "eval_runtime": 38.075, - "eval_samples_per_second": 4.229, - "eval_steps_per_second": 0.079, - "eval_wer": 26.15712545676005, - "step": 74000 - }, - { - "epoch": 0.08, - "learning_rate": 3e-06, - "loss": 0.8865, - "step": 74050 - }, - { - "epoch": 0.08, - "learning_rate": 3e-06, - "loss": 0.8765, - "step": 74100 - }, - { - "epoch": 0.08, - "learning_rate": 3e-06, - "loss": 0.8757, - "step": 74150 - }, - { - "epoch": 0.08, - "learning_rate": 3e-06, - "loss": 0.8743, - "step": 74200 - }, - { - "epoch": 0.08, - "learning_rate": 3e-06, - "loss": 0.8726, - "step": 74250 - }, - { - "epoch": 1.0, - "learning_rate": 3e-06, - "loss": 0.869, - "step": 74300 - }, - { - "epoch": 1.0, - "learning_rate": 3e-06, - "loss": 0.8727, - "step": 74350 - }, - { - "epoch": 1.0, - "learning_rate": 3e-06, - "loss": 0.8629, - "step": 74400 - }, - { - "epoch": 1.0, - "learning_rate": 3e-06, - "loss": 0.8445, - "step": 74450 - }, - { - "epoch": 1.0, - "learning_rate": 3e-06, - "loss": 0.8617, - "step": 74500 - }, - { - "epoch": 1.0, - "learning_rate": 3e-06, - "loss": 0.8703, - "step": 74550 - }, - { - "epoch": 1.0, - "learning_rate": 3e-06, - "loss": 0.8754, - "step": 74600 - }, - { - "epoch": 1.0, - "learning_rate": 3e-06, - "loss": 0.8683, - "step": 74650 - }, - { - "epoch": 1.0, - "learning_rate": 3e-06, - "loss": 0.8766, - "step": 74700 - }, - { - "epoch": 1.0, - "learning_rate": 3e-06, - "loss": 0.8712, - "step": 74750 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 0.8589, - "step": 74800 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 0.8652, - "step": 74850 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 0.8646, - "step": 74900 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 0.8675, - "step": 74950 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 0.8706, - "step": 75000 - }, - { - "epoch": 1.01, - "eval_loss": 0.5247220993041992, - "eval_runtime": 39.3966, - "eval_samples_per_second": 4.087, - "eval_steps_per_second": 0.076, - "eval_wer": 26.15712545676005, - "step": 75000 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 0.8645, - "step": 75050 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 0.8716, - "step": 75100 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 0.8705, - "step": 75150 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 0.858, - "step": 75200 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 0.8568, - "step": 75250 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 0.8549, - "step": 75300 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 0.8668, - "step": 75350 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 0.8727, - "step": 75400 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 0.8738, - "step": 75450 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 0.88, - "step": 75500 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 0.8669, - "step": 75550 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 0.8576, - "step": 75600 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 0.8612, - "step": 75650 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 0.8623, - "step": 75700 - }, - { - "epoch": 1.01, - "learning_rate": 3e-06, - "loss": 0.8613, - "step": 75750 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 0.8575, - "step": 75800 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 0.8687, - "step": 75850 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 0.8654, - "step": 75900 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 0.8602, - "step": 75950 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 0.8585, - "step": 76000 - }, - { - "epoch": 1.02, - "eval_loss": 0.5264820456504822, - "eval_runtime": 38.9874, - "eval_samples_per_second": 4.13, - "eval_steps_per_second": 0.077, - "eval_wer": 26.370280146163218, - "step": 76000 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 0.8717, - "step": 76050 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 0.8724, - "step": 76100 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 0.8691, - "step": 76150 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 0.8587, - "step": 76200 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 0.8665, - "step": 76250 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 0.8613, - "step": 76300 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 0.8781, - "step": 76350 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 0.8639, - "step": 76400 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 0.8638, - "step": 76450 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 0.8534, - "step": 76500 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 0.8729, - "step": 76550 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 0.8733, - "step": 76600 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 0.864, - "step": 76650 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 0.8605, - "step": 76700 - }, - { - "epoch": 1.02, - "learning_rate": 3e-06, - "loss": 0.8624, - "step": 76750 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 0.8575, - "step": 76800 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 0.8696, - "step": 76850 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 0.8636, - "step": 76900 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 0.8684, - "step": 76950 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 0.8659, - "step": 77000 - }, - { - "epoch": 1.03, - "eval_loss": 0.5261924862861633, - "eval_runtime": 39.7957, - "eval_samples_per_second": 4.046, - "eval_steps_per_second": 0.075, - "eval_wer": 26.73568818514007, - "step": 77000 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 0.8672, - "step": 77050 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 0.8756, - "step": 77100 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 0.8668, - "step": 77150 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 0.8619, - "step": 77200 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 0.8676, - "step": 77250 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 0.8594, - "step": 77300 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 0.8534, - "step": 77350 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 0.8539, - "step": 77400 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 0.8632, - "step": 77450 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 0.8672, - "step": 77500 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 0.8589, - "step": 77550 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 0.8525, - "step": 77600 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 0.8528, - "step": 77650 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 0.868, - "step": 77700 - }, - { - "epoch": 1.03, - "learning_rate": 3e-06, - "loss": 0.8608, - "step": 77750 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 0.8622, - "step": 77800 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 0.8536, - "step": 77850 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 0.8548, - "step": 77900 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 0.8552, - "step": 77950 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 0.8551, - "step": 78000 - }, - { - "epoch": 1.04, - "eval_loss": 0.5248793363571167, - "eval_runtime": 38.0291, - "eval_samples_per_second": 4.234, - "eval_steps_per_second": 0.079, - "eval_wer": 26.065773447015832, - "step": 78000 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 0.868, - "step": 78050 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 0.8623, - "step": 78100 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 0.8586, - "step": 78150 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 0.8573, - "step": 78200 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 0.8553, - "step": 78250 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 0.8659, - "step": 78300 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 0.8696, - "step": 78350 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 0.8676, - "step": 78400 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 0.8669, - "step": 78450 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 0.8631, - "step": 78500 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 0.8586, - "step": 78550 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 0.8691, - "step": 78600 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 0.8537, - "step": 78650 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 0.8671, - "step": 78700 - }, - { - "epoch": 1.04, - "learning_rate": 3e-06, - "loss": 0.8505, - "step": 78750 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 0.8684, - "step": 78800 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 0.8555, - "step": 78850 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 0.8747, - "step": 78900 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 0.8695, - "step": 78950 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 0.8572, - "step": 79000 - }, - { - "epoch": 1.05, - "eval_loss": 0.5249020457267761, - "eval_runtime": 38.8753, - "eval_samples_per_second": 4.141, - "eval_steps_per_second": 0.077, - "eval_wer": 26.278928136419, - "step": 79000 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 0.874, - "step": 79050 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 0.8587, - "step": 79100 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 0.8527, - "step": 79150 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 0.853, - "step": 79200 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 0.8614, - "step": 79250 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 0.8535, - "step": 79300 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 0.856, - "step": 79350 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 0.871, - "step": 79400 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 0.877, - "step": 79450 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 0.854, - "step": 79500 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 0.856, - "step": 79550 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 0.8606, - "step": 79600 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 0.8567, - "step": 79650 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 0.8673, - "step": 79700 - }, - { - "epoch": 1.05, - "learning_rate": 3e-06, - "loss": 0.8626, - "step": 79750 - }, - { - "epoch": 1.06, - "learning_rate": 3e-06, - "loss": 0.8571, - "step": 79800 - }, - { - "epoch": 1.06, - "learning_rate": 3e-06, - "loss": 0.8651, - "step": 79850 - }, - { - "epoch": 1.06, - "learning_rate": 3e-06, - "loss": 0.8625, - "step": 79900 - }, - { - "epoch": 1.06, - "learning_rate": 3e-06, - "loss": 0.8639, - "step": 79950 - }, - { - "epoch": 1.06, - "learning_rate": 3e-06, - "loss": 0.8612, - "step": 80000 - }, - { - "epoch": 1.06, - "eval_loss": 0.5234566330909729, - "eval_runtime": 39.62, - "eval_samples_per_second": 4.064, - "eval_steps_per_second": 0.076, - "eval_wer": 25.761266747868454, - "step": 80000 - }, - { - "epoch": 1.06, - "learning_rate": 3e-06, - "loss": 0.8742, - "step": 80050 - }, - { - "epoch": 1.06, - "learning_rate": 3e-06, - "loss": 0.8658, - "step": 80100 - }, - { - "epoch": 1.06, - "learning_rate": 3e-06, - "loss": 0.8611, - "step": 80150 - }, - { - "epoch": 1.06, - "learning_rate": 3e-06, - "loss": 0.8718, - "step": 80200 - }, - { - "epoch": 1.06, - "learning_rate": 3e-06, - "loss": 0.8697, - "step": 80250 - }, - { - "epoch": 1.06, - "learning_rate": 3e-06, - "loss": 0.8722, - "step": 80300 - }, - { - "epoch": 1.06, - "learning_rate": 3e-06, - "loss": 0.873, - "step": 80350 - }, - { - "epoch": 1.06, - "learning_rate": 3e-06, - "loss": 0.8662, - "step": 80400 - }, - { - "epoch": 1.06, - "learning_rate": 3e-06, - "loss": 0.8656, - "step": 80450 - }, - { - "epoch": 1.06, - "learning_rate": 3e-06, - "loss": 0.87, - "step": 80500 - }, - { - "epoch": 1.06, - "learning_rate": 3e-06, - "loss": 0.8658, - "step": 80550 - }, - { - "epoch": 1.06, - "learning_rate": 3e-06, - "loss": 0.8703, - "step": 80600 - }, - { - "epoch": 1.06, - "learning_rate": 3e-06, - "loss": 0.8718, - "step": 80650 - }, - { - "epoch": 1.06, - "learning_rate": 3e-06, - "loss": 0.8589, - "step": 80700 - }, - { - "epoch": 1.06, - "learning_rate": 3e-06, - "loss": 0.8676, - "step": 80750 - }, - { - "epoch": 1.07, - "learning_rate": 3e-06, - "loss": 0.8589, - "step": 80800 - }, - { - "epoch": 1.07, - "learning_rate": 3e-06, - "loss": 0.8624, - "step": 80850 - }, - { - "epoch": 1.07, - "learning_rate": 3e-06, - "loss": 0.8645, - "step": 80900 - }, - { - "epoch": 1.07, - "learning_rate": 3e-06, - "loss": 0.8727, - "step": 80950 - }, - { - "epoch": 1.07, - "learning_rate": 3e-06, - "loss": 0.8598, - "step": 81000 - }, - { - "epoch": 1.07, - "eval_loss": 0.5208215117454529, - "eval_runtime": 38.4397, - "eval_samples_per_second": 4.188, - "eval_steps_per_second": 0.078, - "eval_wer": 25.700365408038977, - "step": 81000 - }, - { - "epoch": 1.07, - "learning_rate": 3e-06, - "loss": 0.882, - "step": 81050 - }, - { - "epoch": 1.07, - "learning_rate": 3e-06, - "loss": 0.8737, - "step": 81100 - }, - { - "epoch": 1.07, - "learning_rate": 3e-06, - "loss": 0.8689, - "step": 81150 - }, - { - "epoch": 1.07, - "learning_rate": 3e-06, - "loss": 0.8631, - "step": 81200 - }, - { - "epoch": 1.07, - "learning_rate": 3e-06, - "loss": 0.8722, - "step": 81250 - }, - { - "epoch": 1.07, - "learning_rate": 3e-06, - "loss": 0.8688, - "step": 81300 - }, - { - "epoch": 1.07, - "learning_rate": 3e-06, - "loss": 0.8662, - "step": 81350 - }, - { - "epoch": 1.07, - "learning_rate": 3e-06, - "loss": 0.8627, - "step": 81400 - }, - { - "epoch": 1.07, - "learning_rate": 3e-06, - "loss": 0.8671, - "step": 81450 - }, - { - "epoch": 1.07, - "learning_rate": 3e-06, - "loss": 0.8573, - "step": 81500 - }, - { - "epoch": 1.07, - "learning_rate": 3e-06, - "loss": 0.861, - "step": 81550 - }, - { - "epoch": 1.07, - "learning_rate": 3e-06, - "loss": 0.867, - "step": 81600 - }, - { - "epoch": 1.07, - "learning_rate": 3e-06, - "loss": 0.8639, - "step": 81650 - }, - { - "epoch": 1.07, - "learning_rate": 3e-06, - "loss": 0.8607, - "step": 81700 - }, - { - "epoch": 1.07, - "learning_rate": 3e-06, - "loss": 0.8602, - "step": 81750 - }, - { - "epoch": 1.08, - "learning_rate": 3e-06, - "loss": 0.8719, - "step": 81800 - }, - { - "epoch": 1.08, - "learning_rate": 3e-06, - "loss": 0.8637, - "step": 81850 - }, - { - "epoch": 1.08, - "learning_rate": 3e-06, - "loss": 0.8598, - "step": 81900 - }, - { - "epoch": 1.08, - "learning_rate": 3e-06, - "loss": 0.867, - "step": 81950 - }, - { - "epoch": 1.08, - "learning_rate": 3e-06, - "loss": 0.8686, - "step": 82000 - }, - { - "epoch": 1.08, - "eval_loss": 0.5214089155197144, - "eval_runtime": 37.0595, - "eval_samples_per_second": 4.344, - "eval_steps_per_second": 0.081, - "eval_wer": 25.700365408038977, - "step": 82000 - }, - { - "epoch": 1.08, - "learning_rate": 3e-06, - "loss": 0.8712, - "step": 82050 - }, - { - "epoch": 1.08, - "learning_rate": 3e-06, - "loss": 0.8656, - "step": 82100 - }, - { - "epoch": 1.08, - "learning_rate": 3e-06, - "loss": 0.8677, - "step": 82150 - }, - { - "epoch": 1.08, - "learning_rate": 3e-06, - "loss": 0.8656, - "step": 82200 - }, - { - "epoch": 1.08, - "learning_rate": 3e-06, - "loss": 0.8651, - "step": 82250 - }, - { - "epoch": 1.08, - "learning_rate": 3e-06, - "loss": 0.874, - "step": 82300 - }, - { - "epoch": 1.08, - "learning_rate": 3e-06, - "loss": 0.8735, - "step": 82350 - }, - { - "epoch": 1.08, - "learning_rate": 3e-06, - "loss": 0.8637, - "step": 82400 - }, - { - "epoch": 1.08, - "learning_rate": 3e-06, - "loss": 0.8657, - "step": 82450 - }, - { - "epoch": 1.08, - "learning_rate": 3e-06, - "loss": 0.8685, - "step": 82500 - }, - { - "epoch": 1.08, - "learning_rate": 3e-06, - "loss": 0.8534, - "step": 82550 - }, - { - "epoch": 2.0, - "learning_rate": 3e-06, - "loss": 0.8645, - "step": 82600 - }, - { - "epoch": 2.0, - "learning_rate": 3e-06, - "loss": 0.8448, - "step": 82650 - }, - { - "epoch": 2.0, - "learning_rate": 3e-06, - "loss": 0.8565, - "step": 82700 - }, - { - "epoch": 2.0, - "learning_rate": 3e-06, - "loss": 0.8537, - "step": 82750 - }, - { - "epoch": 2.0, - "learning_rate": 3e-06, - "loss": 0.8496, - "step": 82800 - }, - { - "epoch": 2.0, - "learning_rate": 3e-06, - "loss": 0.8512, - "step": 82850 - }, - { - "epoch": 2.0, - "learning_rate": 3e-06, - "loss": 0.8483, - "step": 82900 - }, - { - "epoch": 2.0, - "learning_rate": 3e-06, - "loss": 0.8476, - "step": 82950 - }, - { - "epoch": 2.0, - "learning_rate": 3e-06, - "loss": 0.8503, - "step": 83000 - }, - { - "epoch": 2.0, - "eval_loss": 0.5213810205459595, - "eval_runtime": 38.3305, - "eval_samples_per_second": 4.2, - "eval_steps_per_second": 0.078, - "eval_wer": 25.700365408038977, - "step": 83000 - }, - { - "epoch": 2.0, - "learning_rate": 3e-06, - "loss": 0.85, - "step": 83050 - }, - { - "epoch": 2.01, - "learning_rate": 3e-06, - "loss": 0.848, - "step": 83100 - }, - { - "epoch": 2.01, - "learning_rate": 3e-06, - "loss": 0.8498, - "step": 83150 - }, - { - "epoch": 2.01, - "learning_rate": 3e-06, - "loss": 0.8523, - "step": 83200 - }, - { - "epoch": 2.01, - "learning_rate": 3e-06, - "loss": 0.8537, - "step": 83250 - }, - { - "epoch": 2.01, - "learning_rate": 3e-06, - "loss": 0.8542, - "step": 83300 - }, - { - "epoch": 2.01, - "learning_rate": 3e-06, - "loss": 0.8519, - "step": 83350 - }, - { - "epoch": 2.01, - "learning_rate": 3e-06, - "loss": 0.857, - "step": 83400 - }, - { - "epoch": 2.01, - "learning_rate": 3e-06, - "loss": 0.8636, - "step": 83450 - }, - { - "epoch": 2.01, - "learning_rate": 3e-06, - "loss": 0.8408, - "step": 83500 - }, - { - "epoch": 2.01, - "learning_rate": 3e-06, - "loss": 0.8688, - "step": 83550 - }, - { - "epoch": 2.01, - "learning_rate": 3e-06, - "loss": 0.8539, - "step": 83600 - }, - { - "epoch": 2.01, - "learning_rate": 3e-06, - "loss": 0.8406, - "step": 83650 - }, - { - "epoch": 2.01, - "learning_rate": 3e-06, - "loss": 0.8532, - "step": 83700 - }, - { - "epoch": 2.01, - "learning_rate": 3e-06, - "loss": 0.86, - "step": 83750 - }, - { - "epoch": 2.01, - "learning_rate": 3e-06, - "loss": 0.8591, - "step": 83800 - }, - { - "epoch": 2.01, - "learning_rate": 3e-06, - "loss": 0.8469, - "step": 83850 - }, - { - "epoch": 2.01, - "learning_rate": 3e-06, - "loss": 0.8537, - "step": 83900 - }, - { - "epoch": 2.01, - "learning_rate": 3e-06, - "loss": 0.848, - "step": 83950 - }, - { - "epoch": 2.01, - "learning_rate": 3e-06, - "loss": 0.8545, - "step": 84000 - }, - { - "epoch": 2.01, - "eval_loss": 0.5214766263961792, - "eval_runtime": 53.964, - "eval_samples_per_second": 2.983, - "eval_steps_per_second": 0.056, - "eval_wer": 28.227771010962243, - "step": 84000 - }, - { - "epoch": 2.01, - "learning_rate": 3e-06, - "loss": 0.8418, - "step": 84050 - }, - { - "epoch": 2.02, - "learning_rate": 3e-06, - "loss": 0.8581, - "step": 84100 - }, - { - "epoch": 2.02, - "learning_rate": 3e-06, - "loss": 0.851, - "step": 84150 - }, - { - "epoch": 2.02, - "learning_rate": 3e-06, - "loss": 0.8384, - "step": 84200 - }, - { - "epoch": 2.02, - "learning_rate": 3e-06, - "loss": 0.8519, - "step": 84250 - }, - { - "epoch": 2.02, - "learning_rate": 3e-06, - "loss": 0.859, - "step": 84300 - }, - { - "epoch": 2.02, - "learning_rate": 3e-06, - "loss": 0.8476, - "step": 84350 - }, - { - "epoch": 2.02, - "learning_rate": 3e-06, - "loss": 0.8543, - "step": 84400 - }, - { - "epoch": 2.02, - "learning_rate": 3e-06, - "loss": 0.8462, - "step": 84450 - }, - { - "epoch": 2.02, - "learning_rate": 3e-06, - "loss": 0.8383, - "step": 84500 - }, - { - "epoch": 2.02, - "learning_rate": 3e-06, - "loss": 0.8431, - "step": 84550 - }, - { - "epoch": 2.02, - "learning_rate": 3e-06, - "loss": 0.852, - "step": 84600 - }, - { - "epoch": 2.02, - "learning_rate": 3e-06, - "loss": 0.8607, - "step": 84650 - }, - { - "epoch": 2.02, - "learning_rate": 3e-06, - "loss": 0.8469, - "step": 84700 - }, - { - "epoch": 2.02, - "learning_rate": 3e-06, - "loss": 0.8391, - "step": 84750 - }, - { - "epoch": 2.02, - "learning_rate": 3e-06, - "loss": 0.8567, - "step": 84800 - }, - { - "epoch": 2.02, - "learning_rate": 3e-06, - "loss": 0.8409, - "step": 84850 - }, - { - "epoch": 2.02, - "learning_rate": 3e-06, - "loss": 0.8509, - "step": 84900 - }, - { - "epoch": 2.02, - "learning_rate": 3e-06, - "loss": 0.8606, - "step": 84950 - }, - { - "epoch": 2.02, - "learning_rate": 3e-06, - "loss": 0.8594, - "step": 85000 - }, - { - "epoch": 2.02, - "eval_loss": 0.5186431407928467, - "eval_runtime": 39.735, - "eval_samples_per_second": 4.052, - "eval_steps_per_second": 0.076, - "eval_wer": 25.669914738124238, - "step": 85000 - }, - { - "epoch": 2.02, - "learning_rate": 3e-06, - "loss": 0.8663, - "step": 85050 - }, - { - "epoch": 2.03, - "learning_rate": 3e-06, - "loss": 0.8503, - "step": 85100 - }, - { - "epoch": 2.03, - "learning_rate": 3e-06, - "loss": 0.848, - "step": 85150 - }, - { - "epoch": 2.03, - "learning_rate": 3e-06, - "loss": 0.8507, - "step": 85200 - }, - { - "epoch": 2.03, - "learning_rate": 3e-06, - "loss": 0.8559, - "step": 85250 - }, - { - "epoch": 2.03, - "learning_rate": 3e-06, - "loss": 0.8536, - "step": 85300 - }, - { - "epoch": 2.03, - "learning_rate": 3e-06, - "loss": 0.8562, - "step": 85350 - }, - { - "epoch": 2.03, - "learning_rate": 3e-06, - "loss": 0.843, - "step": 85400 - }, - { - "epoch": 2.03, - "learning_rate": 3e-06, - "loss": 0.8628, - "step": 85450 - }, - { - "epoch": 2.03, - "learning_rate": 3e-06, - "loss": 0.8418, - "step": 85500 - }, - { - "epoch": 2.03, - "learning_rate": 3e-06, - "loss": 0.8493, - "step": 85550 - }, - { - "epoch": 2.03, - "learning_rate": 3e-06, - "loss": 0.8544, - "step": 85600 - }, - { - "epoch": 2.03, - "learning_rate": 3e-06, - "loss": 0.8529, - "step": 85650 - }, - { - "epoch": 2.03, - "learning_rate": 3e-06, - "loss": 0.8573, - "step": 85700 - }, - { - "epoch": 2.03, - "learning_rate": 3e-06, - "loss": 0.8541, - "step": 85750 - }, - { - "epoch": 2.03, - "learning_rate": 3e-06, - "loss": 0.8428, - "step": 85800 - }, - { - "epoch": 2.03, - "learning_rate": 3e-06, - "loss": 0.8432, - "step": 85850 - }, - { - "epoch": 2.03, - "learning_rate": 3e-06, - "loss": 0.8402, - "step": 85900 - }, - { - "epoch": 2.03, - "learning_rate": 3e-06, - "loss": 0.8465, - "step": 85950 - }, - { - "epoch": 2.03, - "learning_rate": 3e-06, - "loss": 0.86, - "step": 86000 - }, - { - "epoch": 2.03, - "eval_loss": 0.5196462273597717, - "eval_runtime": 40.0681, - "eval_samples_per_second": 4.018, - "eval_steps_per_second": 0.075, - "eval_wer": 25.57856272838002, - "step": 86000 - }, - { - "epoch": 2.03, - "learning_rate": 3e-06, - "loss": 0.8511, - "step": 86050 - }, - { - "epoch": 2.04, - "learning_rate": 3e-06, - "loss": 0.845, - "step": 86100 - }, - { - "epoch": 2.04, - "learning_rate": 3e-06, - "loss": 0.8432, - "step": 86150 - }, - { - "epoch": 2.04, - "learning_rate": 3e-06, - "loss": 0.8493, - "step": 86200 - }, - { - "epoch": 2.04, - "learning_rate": 3e-06, - "loss": 0.8467, - "step": 86250 - }, - { - "epoch": 2.04, - "learning_rate": 3e-06, - "loss": 0.8396, - "step": 86300 - }, - { - "epoch": 2.04, - "learning_rate": 3e-06, - "loss": 0.8501, - "step": 86350 - }, - { - "epoch": 2.04, - "learning_rate": 3e-06, - "loss": 0.8504, - "step": 86400 - }, - { - "epoch": 2.04, - "learning_rate": 3e-06, - "loss": 0.8699, - "step": 86450 - }, - { - "epoch": 2.04, - "learning_rate": 3e-06, - "loss": 0.8479, - "step": 86500 - }, - { - "epoch": 2.04, - "learning_rate": 3e-06, - "loss": 0.8433, - "step": 86550 - }, - { - "epoch": 2.04, - "learning_rate": 3e-06, - "loss": 0.8577, - "step": 86600 - }, - { - "epoch": 2.04, - "learning_rate": 3e-06, - "loss": 0.8428, - "step": 86650 - }, - { - "epoch": 2.04, - "learning_rate": 3e-06, - "loss": 0.8472, - "step": 86700 - }, - { - "epoch": 2.04, - "learning_rate": 3e-06, - "loss": 0.8506, - "step": 86750 - }, - { - "epoch": 2.04, - "learning_rate": 3e-06, - "loss": 0.8485, - "step": 86800 - }, - { - "epoch": 2.04, - "learning_rate": 3e-06, - "loss": 0.8391, - "step": 86850 - }, - { - "epoch": 2.04, - "learning_rate": 3e-06, - "loss": 0.8391, - "step": 86900 - }, - { - "epoch": 2.04, - "learning_rate": 3e-06, - "loss": 0.8627, - "step": 86950 - }, - { - "epoch": 2.04, - "learning_rate": 3e-06, - "loss": 0.8514, - "step": 87000 - }, - { - "epoch": 2.04, - "eval_loss": 0.5203261971473694, - "eval_runtime": 39.1863, - "eval_samples_per_second": 4.109, - "eval_steps_per_second": 0.077, - "eval_wer": 25.182704019488426, - "step": 87000 - }, - { - "epoch": 2.04, - "learning_rate": 3e-06, - "loss": 0.8551, - "step": 87050 - }, - { - "epoch": 2.05, - "learning_rate": 3e-06, - "loss": 0.8441, - "step": 87100 - }, - { - "epoch": 2.05, - "learning_rate": 3e-06, - "loss": 0.8528, - "step": 87150 - }, - { - "epoch": 2.05, - "learning_rate": 3e-06, - "loss": 0.8619, - "step": 87200 - }, - { - "epoch": 2.05, - "learning_rate": 3e-06, - "loss": 0.8434, - "step": 87250 - }, - { - "epoch": 2.05, - "learning_rate": 3e-06, - "loss": 0.846, - "step": 87300 - }, - { - "epoch": 2.05, - "learning_rate": 3e-06, - "loss": 0.8567, - "step": 87350 - }, - { - "epoch": 2.05, - "learning_rate": 3e-06, - "loss": 0.8502, - "step": 87400 - }, - { - "epoch": 2.05, - "learning_rate": 3e-06, - "loss": 0.8531, - "step": 87450 - }, - { - "epoch": 2.05, - "learning_rate": 3e-06, - "loss": 0.8515, - "step": 87500 - }, - { - "epoch": 2.05, - "learning_rate": 3e-06, - "loss": 0.8344, - "step": 87550 - }, - { - "epoch": 2.05, - "learning_rate": 3e-06, - "loss": 0.8511, - "step": 87600 - }, - { - "epoch": 2.05, - "learning_rate": 3e-06, - "loss": 0.8474, - "step": 87650 - }, - { - "epoch": 2.05, - "learning_rate": 3e-06, - "loss": 0.8474, - "step": 87700 - }, - { - "epoch": 2.05, - "learning_rate": 3e-06, - "loss": 0.8569, - "step": 87750 - }, - { - "epoch": 2.05, - "learning_rate": 3e-06, - "loss": 0.8505, - "step": 87800 - }, - { - "epoch": 2.05, - "learning_rate": 3e-06, - "loss": 0.8571, - "step": 87850 - }, - { - "epoch": 2.05, - "learning_rate": 3e-06, - "loss": 0.8469, - "step": 87900 - }, - { - "epoch": 2.05, - "learning_rate": 3e-06, - "loss": 0.8564, - "step": 87950 - }, - { - "epoch": 2.05, - "learning_rate": 3e-06, - "loss": 0.8505, - "step": 88000 - }, - { - "epoch": 2.05, - "eval_loss": 0.5163885354995728, - "eval_runtime": 50.9215, - "eval_samples_per_second": 3.162, - "eval_steps_per_second": 0.059, - "eval_wer": 28.01461632155907, - "step": 88000 - }, - { - "epoch": 2.05, - "learning_rate": 3e-06, - "loss": 0.8538, - "step": 88050 - }, - { - "epoch": 2.06, - "learning_rate": 3e-06, - "loss": 0.8592, - "step": 88100 - }, - { - "epoch": 2.06, - "learning_rate": 3e-06, - "loss": 0.851, - "step": 88150 - }, - { - "epoch": 2.06, - "learning_rate": 3e-06, - "loss": 0.8455, - "step": 88200 - }, - { - "epoch": 2.06, - "learning_rate": 3e-06, - "loss": 0.8412, - "step": 88250 - }, - { - "epoch": 2.06, - "learning_rate": 3e-06, - "loss": 0.8507, - "step": 88300 - }, - { - "epoch": 2.06, - "learning_rate": 3e-06, - "loss": 0.8462, - "step": 88350 - }, - { - "epoch": 2.06, - "learning_rate": 3e-06, - "loss": 0.8452, - "step": 88400 - }, - { - "epoch": 2.06, - "learning_rate": 3e-06, - "loss": 0.8406, - "step": 88450 - }, - { - "epoch": 2.06, - "learning_rate": 3e-06, - "loss": 0.8348, - "step": 88500 - }, - { - "epoch": 2.06, - "learning_rate": 3e-06, - "loss": 0.8376, - "step": 88550 - }, - { - "epoch": 2.06, - "learning_rate": 3e-06, - "loss": 0.8336, - "step": 88600 - }, - { - "epoch": 2.06, - "learning_rate": 3e-06, - "loss": 0.848, - "step": 88650 - }, - { - "epoch": 2.06, - "learning_rate": 3e-06, - "loss": 0.8507, - "step": 88700 - }, - { - "epoch": 2.06, - "learning_rate": 3e-06, - "loss": 0.8512, - "step": 88750 - }, - { - "epoch": 2.06, - "learning_rate": 3e-06, - "loss": 0.8441, - "step": 88800 - }, - { - "epoch": 2.06, - "learning_rate": 3e-06, - "loss": 0.8459, - "step": 88850 - }, - { - "epoch": 2.06, - "learning_rate": 3e-06, - "loss": 0.8484, - "step": 88900 - }, - { - "epoch": 2.06, - "learning_rate": 3e-06, - "loss": 0.8389, - "step": 88950 - }, - { - "epoch": 2.06, - "learning_rate": 3e-06, - "loss": 0.8512, - "step": 89000 - }, - { - "epoch": 2.06, - "eval_loss": 0.5173874497413635, - "eval_runtime": 39.69, - "eval_samples_per_second": 4.056, - "eval_steps_per_second": 0.076, - "eval_wer": 25.091352009744217, - "step": 89000 - }, - { - "epoch": 2.06, - "learning_rate": 3e-06, - "loss": 0.8477, - "step": 89050 - }, - { - "epoch": 2.07, - "learning_rate": 3e-06, - "loss": 0.8642, - "step": 89100 - }, - { - "epoch": 2.07, - "learning_rate": 3e-06, - "loss": 0.8497, - "step": 89150 - }, - { - "epoch": 2.07, - "learning_rate": 3e-06, - "loss": 0.8489, - "step": 89200 - }, - { - "epoch": 2.07, - "learning_rate": 3e-06, - "loss": 0.8383, - "step": 89250 - }, - { - "epoch": 2.07, - "learning_rate": 3e-06, - "loss": 0.8441, - "step": 89300 - }, - { - "epoch": 2.07, - "learning_rate": 3e-06, - "loss": 0.8523, - "step": 89350 - }, - { - "epoch": 2.07, - "learning_rate": 3e-06, - "loss": 0.836, - "step": 89400 - }, - { - "epoch": 2.07, - "learning_rate": 3e-06, - "loss": 0.8397, - "step": 89450 - }, - { - "epoch": 2.07, - "learning_rate": 3e-06, - "loss": 0.8432, - "step": 89500 - }, - { - "epoch": 2.07, - "learning_rate": 3e-06, - "loss": 0.8383, - "step": 89550 - }, - { - "epoch": 2.07, - "learning_rate": 3e-06, - "loss": 0.8475, - "step": 89600 - }, - { - "epoch": 2.07, - "learning_rate": 3e-06, - "loss": 0.8469, - "step": 89650 - }, - { - "epoch": 2.07, - "learning_rate": 3e-06, - "loss": 0.8537, - "step": 89700 - }, - { - "epoch": 2.07, - "learning_rate": 3e-06, - "loss": 0.8516, - "step": 89750 - }, - { - "epoch": 2.07, - "learning_rate": 3e-06, - "loss": 0.8452, - "step": 89800 - }, - { - "epoch": 2.07, - "learning_rate": 3e-06, - "loss": 0.847, - "step": 89850 - }, - { - "epoch": 2.07, - "learning_rate": 3e-06, - "loss": 0.8373, - "step": 89900 - }, - { - "epoch": 2.07, - "learning_rate": 3e-06, - "loss": 0.8446, - "step": 89950 - }, - { - "epoch": 2.07, - "learning_rate": 3e-06, - "loss": 0.8495, - "step": 90000 - }, - { - "epoch": 2.07, - "eval_loss": 0.5141222476959229, - "eval_runtime": 38.4488, - "eval_samples_per_second": 4.187, - "eval_steps_per_second": 0.078, - "eval_wer": 25.54811205846529, - "step": 90000 - }, - { - "epoch": 2.07, - "learning_rate": 3e-06, - "loss": 0.8508, - "step": 90050 - }, - { - "epoch": 2.08, - "learning_rate": 3e-06, - "loss": 0.8408, - "step": 90100 - }, - { - "epoch": 2.08, - "learning_rate": 3e-06, - "loss": 0.8439, - "step": 90150 - }, - { - "epoch": 2.08, - "learning_rate": 3e-06, - "loss": 0.8476, - "step": 90200 - }, - { - "epoch": 2.08, - "learning_rate": 3e-06, - "loss": 0.8434, - "step": 90250 - }, - { - "epoch": 2.08, - "learning_rate": 3e-06, - "loss": 0.8535, - "step": 90300 - }, - { - "epoch": 2.08, - "learning_rate": 3e-06, - "loss": 0.8414, - "step": 90350 - }, - { - "epoch": 2.08, - "learning_rate": 3e-06, - "loss": 0.853, - "step": 90400 - }, - { - "epoch": 2.08, - "learning_rate": 3e-06, - "loss": 0.8436, - "step": 90450 - }, - { - "epoch": 2.08, - "learning_rate": 3e-06, - "loss": 0.8366, - "step": 90500 - }, - { - "epoch": 2.08, - "learning_rate": 3e-06, - "loss": 0.8387, - "step": 90550 - }, - { - "epoch": 2.08, - "learning_rate": 3e-06, - "loss": 0.8421, - "step": 90600 - }, - { - "epoch": 2.08, - "learning_rate": 3e-06, - "loss": 0.8371, - "step": 90650 - }, - { - "epoch": 2.08, - "learning_rate": 3e-06, - "loss": 0.8542, - "step": 90700 - }, - { - "epoch": 2.08, - "learning_rate": 3e-06, - "loss": 0.8543, - "step": 90750 - }, - { - "epoch": 2.08, - "learning_rate": 3e-06, - "loss": 0.8352, - "step": 90800 - }, - { - "epoch": 2.08, - "learning_rate": 3e-06, - "loss": 0.8383, - "step": 90850 - }, - { - "epoch": 3.0, - "learning_rate": 3e-06, - "loss": 0.841, - "step": 90900 - }, - { - "epoch": 3.0, - "learning_rate": 3e-06, - "loss": 0.839, - "step": 90950 - }, - { - "epoch": 3.0, - "learning_rate": 3e-06, - "loss": 0.8381, - "step": 91000 - }, - { - "epoch": 3.0, - "eval_loss": 0.5129591226577759, - "eval_runtime": 38.5934, - "eval_samples_per_second": 4.172, - "eval_steps_per_second": 0.078, - "eval_wer": 24.96954933008526, - "step": 91000 - }, - { - "epoch": 3.0, - "learning_rate": 3e-06, - "loss": 0.828, - "step": 91050 - }, - { - "epoch": 3.0, - "learning_rate": 3e-06, - "loss": 0.8258, - "step": 91100 - }, - { - "epoch": 3.0, - "learning_rate": 3e-06, - "loss": 0.8379, - "step": 91150 - }, - { - "epoch": 3.0, - "learning_rate": 3e-06, - "loss": 0.8315, - "step": 91200 - }, - { - "epoch": 3.0, - "learning_rate": 3e-06, - "loss": 0.8401, - "step": 91250 - }, - { - "epoch": 3.0, - "learning_rate": 3e-06, - "loss": 0.8444, - "step": 91300 - }, - { - "epoch": 3.0, - "learning_rate": 3e-06, - "loss": 0.843, - "step": 91350 - }, - { - "epoch": 3.01, - "learning_rate": 3e-06, - "loss": 0.8325, - "step": 91400 - }, - { - "epoch": 3.01, - "learning_rate": 3e-06, - "loss": 0.8416, - "step": 91450 - }, - { - "epoch": 3.01, - "learning_rate": 3e-06, - "loss": 0.8468, - "step": 91500 - }, - { - "epoch": 3.01, - "learning_rate": 3e-06, - "loss": 0.8376, - "step": 91550 - }, - { - "epoch": 3.01, - "learning_rate": 3e-06, - "loss": 0.8431, - "step": 91600 - }, - { - "epoch": 3.01, - "learning_rate": 3e-06, - "loss": 0.8307, - "step": 91650 - }, - { - "epoch": 3.01, - "learning_rate": 3e-06, - "loss": 0.83, - "step": 91700 - }, - { - "epoch": 3.01, - "learning_rate": 3e-06, - "loss": 0.8404, - "step": 91750 - }, - { - "epoch": 3.01, - "learning_rate": 3e-06, - "loss": 0.8429, - "step": 91800 - }, - { - "epoch": 3.01, - "learning_rate": 3e-06, - "loss": 0.8328, - "step": 91850 - }, - { - "epoch": 3.01, - "learning_rate": 3e-06, - "loss": 0.83, - "step": 91900 - }, - { - "epoch": 3.01, - "learning_rate": 3e-06, - "loss": 0.8256, - "step": 91950 - }, - { - "epoch": 3.01, - "learning_rate": 3e-06, - "loss": 0.8253, - "step": 92000 - }, - { - "epoch": 3.01, - "eval_loss": 0.5146694779396057, - "eval_runtime": 35.1807, - "eval_samples_per_second": 4.576, - "eval_steps_per_second": 0.085, - "eval_wer": 25.57856272838002, - "step": 92000 - }, - { - "epoch": 3.01, - "learning_rate": 3e-06, - "loss": 0.8357, - "step": 92050 - }, - { - "epoch": 3.01, - "learning_rate": 3e-06, - "loss": 0.8417, - "step": 92100 - }, - { - "epoch": 3.01, - "learning_rate": 3e-06, - "loss": 0.8426, - "step": 92150 - }, - { - "epoch": 3.01, - "learning_rate": 3e-06, - "loss": 0.8331, - "step": 92200 - }, - { - "epoch": 3.01, - "learning_rate": 3e-06, - "loss": 0.8215, - "step": 92250 - }, - { - "epoch": 3.01, - "learning_rate": 3e-06, - "loss": 0.8274, - "step": 92300 - }, - { - "epoch": 3.01, - "learning_rate": 3e-06, - "loss": 0.8268, - "step": 92350 - }, - { - "epoch": 3.02, - "learning_rate": 3e-06, - "loss": 0.8322, - "step": 92400 - }, - { - "epoch": 3.02, - "learning_rate": 3e-06, - "loss": 0.84, - "step": 92450 - }, - { - "epoch": 3.02, - "learning_rate": 3e-06, - "loss": 0.8407, - "step": 92500 - }, - { - "epoch": 3.02, - "learning_rate": 3e-06, - "loss": 0.8404, - "step": 92550 - }, - { - "epoch": 3.02, - "learning_rate": 3e-06, - "loss": 0.8395, - "step": 92600 - }, - { - "epoch": 3.02, - "learning_rate": 3e-06, - "loss": 0.8346, - "step": 92650 - }, - { - "epoch": 3.02, - "learning_rate": 3e-06, - "loss": 0.8292, - "step": 92700 - }, - { - "epoch": 3.02, - "learning_rate": 3e-06, - "loss": 0.8389, - "step": 92750 - }, - { - "epoch": 3.02, - "learning_rate": 3e-06, - "loss": 0.8261, - "step": 92800 - }, - { - "epoch": 3.02, - "learning_rate": 3e-06, - "loss": 0.8261, - "step": 92850 - }, - { - "epoch": 3.02, - "learning_rate": 3e-06, - "loss": 0.8343, - "step": 92900 - }, - { - "epoch": 3.02, - "learning_rate": 3e-06, - "loss": 0.8294, - "step": 92950 - }, - { - "epoch": 3.02, - "learning_rate": 3e-06, - "loss": 0.8387, - "step": 93000 - }, - { - "epoch": 3.02, - "eval_loss": 0.5168384313583374, - "eval_runtime": 38.8899, - "eval_samples_per_second": 4.14, - "eval_steps_per_second": 0.077, - "eval_wer": 24.908647990255787, - "step": 93000 - }, - { - "epoch": 3.02, - "learning_rate": 3e-06, - "loss": 0.8296, - "step": 93050 - }, - { - "epoch": 3.02, - "learning_rate": 3e-06, - "loss": 0.8306, - "step": 93100 - }, - { - "epoch": 3.02, - "learning_rate": 3e-06, - "loss": 0.8364, - "step": 93150 - }, - { - "epoch": 3.02, - "learning_rate": 3e-06, - "loss": 0.8453, - "step": 93200 - }, - { - "epoch": 3.02, - "learning_rate": 3e-06, - "loss": 0.8191, - "step": 93250 - }, - { - "epoch": 3.02, - "learning_rate": 3e-06, - "loss": 0.8257, - "step": 93300 - }, - { - "epoch": 3.02, - "learning_rate": 3e-06, - "loss": 0.8376, - "step": 93350 - }, - { - "epoch": 3.03, - "learning_rate": 3e-06, - "loss": 0.8242, - "step": 93400 - }, - { - "epoch": 3.03, - "learning_rate": 3e-06, - "loss": 0.8238, - "step": 93450 - }, - { - "epoch": 3.03, - "learning_rate": 3e-06, - "loss": 0.8319, - "step": 93500 - }, - { - "epoch": 3.03, - "learning_rate": 3e-06, - "loss": 0.8355, - "step": 93550 - }, - { - "epoch": 3.03, - "learning_rate": 3e-06, - "loss": 0.8372, - "step": 93600 - }, - { - "epoch": 3.03, - "learning_rate": 3e-06, - "loss": 0.8326, - "step": 93650 - }, - { - "epoch": 3.03, - "learning_rate": 3e-06, - "loss": 0.8339, - "step": 93700 - }, - { - "epoch": 3.03, - "learning_rate": 3e-06, - "loss": 0.838, - "step": 93750 - }, - { - "epoch": 3.03, - "learning_rate": 3e-06, - "loss": 0.8365, - "step": 93800 - }, - { - "epoch": 3.03, - "learning_rate": 3e-06, - "loss": 0.8359, - "step": 93850 - }, - { - "epoch": 3.03, - "learning_rate": 3e-06, - "loss": 0.8385, - "step": 93900 - }, - { - "epoch": 3.03, - "learning_rate": 3e-06, - "loss": 0.8357, - "step": 93950 - }, - { - "epoch": 3.03, - "learning_rate": 3e-06, - "loss": 0.8425, - "step": 94000 - }, - { - "epoch": 3.03, - "eval_loss": 0.5134768486022949, - "eval_runtime": 37.6084, - "eval_samples_per_second": 4.281, - "eval_steps_per_second": 0.08, - "eval_wer": 25.243605359317904, - "step": 94000 - }, - { - "epoch": 3.03, - "learning_rate": 3e-06, - "loss": 0.8331, - "step": 94050 - }, - { - "epoch": 3.03, - "learning_rate": 3e-06, - "loss": 0.8438, - "step": 94100 - }, - { - "epoch": 3.03, - "learning_rate": 3e-06, - "loss": 0.8389, - "step": 94150 - }, - { - "epoch": 3.03, - "learning_rate": 3e-06, - "loss": 0.8274, - "step": 94200 - }, - { - "epoch": 3.03, - "learning_rate": 3e-06, - "loss": 0.8254, - "step": 94250 - }, - { - "epoch": 3.03, - "learning_rate": 3e-06, - "loss": 0.8322, - "step": 94300 - }, - { - "epoch": 3.03, - "learning_rate": 3e-06, - "loss": 0.8471, - "step": 94350 - }, - { - "epoch": 3.04, - "learning_rate": 3e-06, - "loss": 0.8358, - "step": 94400 - }, - { - "epoch": 3.04, - "learning_rate": 3e-06, - "loss": 0.8337, - "step": 94450 - }, - { - "epoch": 3.04, - "learning_rate": 3e-06, - "loss": 0.8381, - "step": 94500 - }, - { - "epoch": 3.04, - "learning_rate": 3e-06, - "loss": 0.8384, - "step": 94550 - }, - { - "epoch": 3.04, - "learning_rate": 3e-06, - "loss": 0.8286, - "step": 94600 - }, - { - "epoch": 3.04, - "learning_rate": 3e-06, - "loss": 0.8332, - "step": 94650 - }, - { - "epoch": 3.04, - "learning_rate": 3e-06, - "loss": 0.8362, - "step": 94700 - }, - { - "epoch": 3.04, - "learning_rate": 3e-06, - "loss": 0.8306, - "step": 94750 - }, - { - "epoch": 3.04, - "learning_rate": 3e-06, - "loss": 0.8367, - "step": 94800 - }, - { - "epoch": 3.04, - "learning_rate": 3e-06, - "loss": 0.8349, - "step": 94850 - }, - { - "epoch": 3.04, - "learning_rate": 3e-06, - "loss": 0.8286, - "step": 94900 - }, - { - "epoch": 3.04, - "learning_rate": 3e-06, - "loss": 0.8249, - "step": 94950 - }, - { - "epoch": 3.04, - "learning_rate": 3e-06, - "loss": 0.8339, - "step": 95000 - }, - { - "epoch": 3.04, - "eval_loss": 0.5161515474319458, - "eval_runtime": 40.2582, - "eval_samples_per_second": 3.999, - "eval_steps_per_second": 0.075, - "eval_wer": 25.669914738124238, - "step": 95000 - }, - { - "epoch": 3.04, - "learning_rate": 3e-06, - "loss": 0.8428, - "step": 95050 - }, - { - "epoch": 3.04, - "learning_rate": 3e-06, - "loss": 0.8315, - "step": 95100 - }, - { - "epoch": 3.04, - "learning_rate": 3e-06, - "loss": 0.8442, - "step": 95150 - }, - { - "epoch": 3.04, - "learning_rate": 3e-06, - "loss": 0.8305, - "step": 95200 - }, - { - "epoch": 3.04, - "learning_rate": 3e-06, - "loss": 0.8305, - "step": 95250 - }, - { - "epoch": 3.04, - "learning_rate": 3e-06, - "loss": 0.8485, - "step": 95300 - }, - { - "epoch": 3.04, - "learning_rate": 3e-06, - "loss": 0.8374, - "step": 95350 - }, - { - "epoch": 3.05, - "learning_rate": 3e-06, - "loss": 0.8432, - "step": 95400 - }, - { - "epoch": 3.05, - "learning_rate": 3e-06, - "loss": 0.8406, - "step": 95450 - }, - { - "epoch": 3.05, - "learning_rate": 3e-06, - "loss": 0.8399, - "step": 95500 - }, - { - "epoch": 3.05, - "learning_rate": 3e-06, - "loss": 0.8377, - "step": 95550 - }, - { - "epoch": 3.05, - "learning_rate": 3e-06, - "loss": 0.8297, - "step": 95600 - }, - { - "epoch": 3.05, - "learning_rate": 3e-06, - "loss": 0.8499, - "step": 95650 - }, - { - "epoch": 3.05, - "learning_rate": 3e-06, - "loss": 0.8362, - "step": 95700 - }, - { - "epoch": 3.05, - "learning_rate": 3e-06, - "loss": 0.8404, - "step": 95750 - }, - { - "epoch": 3.05, - "learning_rate": 3e-06, - "loss": 0.8365, - "step": 95800 - }, - { - "epoch": 3.05, - "learning_rate": 3e-06, - "loss": 0.8375, - "step": 95850 - }, - { - "epoch": 3.05, - "learning_rate": 3e-06, - "loss": 0.832, - "step": 95900 - }, - { - "epoch": 3.05, - "learning_rate": 3e-06, - "loss": 0.8343, - "step": 95950 - }, - { - "epoch": 3.05, - "learning_rate": 3e-06, - "loss": 0.8402, - "step": 96000 - }, - { - "epoch": 3.05, - "eval_loss": 0.5146769881248474, - "eval_runtime": 36.7672, - "eval_samples_per_second": 4.379, - "eval_steps_per_second": 0.082, - "eval_wer": 25.730816077953715, - "step": 96000 - }, - { - "epoch": 3.05, - "learning_rate": 3e-06, - "loss": 0.8398, - "step": 96050 - }, - { - "epoch": 3.05, - "learning_rate": 3e-06, - "loss": 0.8369, - "step": 96100 - }, - { - "epoch": 3.05, - "learning_rate": 3e-06, - "loss": 0.8496, - "step": 96150 - }, - { - "epoch": 3.05, - "learning_rate": 3e-06, - "loss": 0.8433, - "step": 96200 - }, - { - "epoch": 3.05, - "learning_rate": 3e-06, - "loss": 0.8453, - "step": 96250 - }, - { - "epoch": 3.05, - "learning_rate": 3e-06, - "loss": 0.8381, - "step": 96300 - }, - { - "epoch": 3.05, - "learning_rate": 3e-06, - "loss": 0.8324, - "step": 96350 - }, - { - "epoch": 3.06, - "learning_rate": 3e-06, - "loss": 0.8411, - "step": 96400 - }, - { - "epoch": 3.06, - "learning_rate": 3e-06, - "loss": 0.8311, - "step": 96450 - }, - { - "epoch": 3.06, - "learning_rate": 3e-06, - "loss": 0.8378, - "step": 96500 - }, - { - "epoch": 3.06, - "learning_rate": 3e-06, - "loss": 0.8364, - "step": 96550 - }, - { - "epoch": 3.06, - "learning_rate": 3e-06, - "loss": 0.8361, - "step": 96600 - }, - { - "epoch": 3.06, - "learning_rate": 3e-06, - "loss": 0.8368, - "step": 96650 - }, - { - "epoch": 3.06, - "learning_rate": 3e-06, - "loss": 0.8367, - "step": 96700 - }, - { - "epoch": 3.06, - "learning_rate": 3e-06, - "loss": 0.833, - "step": 96750 - }, - { - "epoch": 3.06, - "learning_rate": 3e-06, - "loss": 0.8383, - "step": 96800 - }, - { - "epoch": 3.06, - "learning_rate": 3e-06, - "loss": 0.8352, - "step": 96850 - }, - { - "epoch": 3.06, - "learning_rate": 3e-06, - "loss": 0.8421, - "step": 96900 - }, - { - "epoch": 3.06, - "learning_rate": 3e-06, - "loss": 0.8359, - "step": 96950 - }, - { - "epoch": 3.06, - "learning_rate": 3e-06, - "loss": 0.8396, - "step": 97000 - }, - { - "epoch": 3.06, - "eval_loss": 0.5142699480056763, - "eval_runtime": 37.3451, - "eval_samples_per_second": 4.311, - "eval_steps_per_second": 0.08, - "eval_wer": 25.669914738124238, - "step": 97000 - }, - { - "epoch": 3.06, - "learning_rate": 3e-06, - "loss": 0.8277, - "step": 97050 - }, - { - "epoch": 3.06, - "learning_rate": 3e-06, - "loss": 0.8331, - "step": 97100 - }, - { - "epoch": 3.06, - "learning_rate": 3e-06, - "loss": 0.825, - "step": 97150 - }, - { - "epoch": 3.06, - "learning_rate": 3e-06, - "loss": 0.8222, - "step": 97200 - }, - { - "epoch": 3.06, - "learning_rate": 3e-06, - "loss": 0.8335, - "step": 97250 - }, - { - "epoch": 3.06, - "learning_rate": 3e-06, - "loss": 0.8344, - "step": 97300 - }, - { - "epoch": 3.06, - "learning_rate": 3e-06, - "loss": 0.8327, - "step": 97350 - }, - { - "epoch": 3.07, - "learning_rate": 3e-06, - "loss": 0.8256, - "step": 97400 - }, - { - "epoch": 3.07, - "learning_rate": 3e-06, - "loss": 0.8365, - "step": 97450 - }, - { - "epoch": 3.07, - "learning_rate": 3e-06, - "loss": 0.8245, - "step": 97500 - }, - { - "epoch": 3.07, - "learning_rate": 3e-06, - "loss": 0.8234, - "step": 97550 - }, - { - "epoch": 3.07, - "learning_rate": 3e-06, - "loss": 0.8386, - "step": 97600 - }, - { - "epoch": 3.07, - "learning_rate": 3e-06, - "loss": 0.8338, - "step": 97650 - }, - { - "epoch": 3.07, - "learning_rate": 3e-06, - "loss": 0.8403, - "step": 97700 - }, - { - "epoch": 3.07, - "learning_rate": 3e-06, - "loss": 0.8465, - "step": 97750 - }, - { - "epoch": 3.07, - "learning_rate": 3e-06, - "loss": 0.836, - "step": 97800 - }, - { - "epoch": 3.07, - "learning_rate": 3e-06, - "loss": 0.8236, - "step": 97850 - }, - { - "epoch": 3.07, - "learning_rate": 3e-06, - "loss": 0.8387, - "step": 97900 - }, - { - "epoch": 3.07, - "learning_rate": 3e-06, - "loss": 0.8309, - "step": 97950 - }, - { - "epoch": 3.07, - "learning_rate": 3e-06, - "loss": 0.8432, - "step": 98000 - }, - { - "epoch": 3.07, - "eval_loss": 0.5099794864654541, - "eval_runtime": 38.2718, - "eval_samples_per_second": 4.207, - "eval_steps_per_second": 0.078, - "eval_wer": 24.878197320341048, - "step": 98000 - }, - { - "epoch": 3.07, - "learning_rate": 3e-06, - "loss": 0.8324, - "step": 98050 - }, - { - "epoch": 3.07, - "learning_rate": 3e-06, - "loss": 0.8328, - "step": 98100 - }, - { - "epoch": 3.07, - "learning_rate": 3e-06, - "loss": 0.8281, - "step": 98150 - }, - { - "epoch": 3.07, - "learning_rate": 3e-06, - "loss": 0.8282, - "step": 98200 - }, - { - "epoch": 3.07, - "learning_rate": 3e-06, - "loss": 0.8376, - "step": 98250 - }, - { - "epoch": 3.07, - "learning_rate": 3e-06, - "loss": 0.8453, - "step": 98300 - }, - { - "epoch": 3.07, - "learning_rate": 3e-06, - "loss": 0.8347, - "step": 98350 - }, - { - "epoch": 3.08, - "learning_rate": 3e-06, - "loss": 0.828, - "step": 98400 - }, - { - "epoch": 3.08, - "learning_rate": 3e-06, - "loss": 0.8273, - "step": 98450 - }, - { - "epoch": 3.08, - "learning_rate": 3e-06, - "loss": 0.8421, - "step": 98500 - }, - { - "epoch": 3.08, - "learning_rate": 3e-06, - "loss": 0.8237, - "step": 98550 - }, - { - "epoch": 3.08, - "learning_rate": 3e-06, - "loss": 0.8207, - "step": 98600 - }, - { - "epoch": 3.08, - "learning_rate": 3e-06, - "loss": 0.8481, - "step": 98650 - }, - { - "epoch": 3.08, - "learning_rate": 3e-06, - "loss": 0.8414, - "step": 98700 - }, - { - "epoch": 3.08, - "learning_rate": 3e-06, - "loss": 0.8472, - "step": 98750 - }, - { - "epoch": 3.08, - "learning_rate": 3e-06, - "loss": 0.8257, - "step": 98800 - }, - { - "epoch": 3.08, - "learning_rate": 3e-06, - "loss": 0.8283, - "step": 98850 - }, - { - "epoch": 3.08, - "learning_rate": 3e-06, - "loss": 0.8336, - "step": 98900 - }, - { - "epoch": 3.08, - "learning_rate": 3e-06, - "loss": 0.832, - "step": 98950 - }, - { - "epoch": 3.08, - "learning_rate": 3e-06, - "loss": 0.844, - "step": 99000 - }, - { - "epoch": 3.08, - "eval_loss": 0.5100468397140503, - "eval_runtime": 36.7593, - "eval_samples_per_second": 4.38, - "eval_steps_per_second": 0.082, - "eval_wer": 25.060901339829478, - "step": 99000 - }, - { - "epoch": 3.08, - "learning_rate": 3e-06, - "loss": 0.839, - "step": 99050 - }, - { - "epoch": 3.08, - "learning_rate": 3e-06, - "loss": 0.838, - "step": 99100 - }, - { - "epoch": 3.08, - "learning_rate": 3e-06, - "loss": 0.8347, - "step": 99150 - }, - { - "epoch": 4.0, - "learning_rate": 3e-06, - "loss": 0.8243, - "step": 99200 - }, - { - "epoch": 4.0, - "learning_rate": 3e-06, - "loss": 0.8305, - "step": 99250 - }, - { - "epoch": 4.0, - "learning_rate": 3e-06, - "loss": 0.8251, - "step": 99300 - }, - { - "epoch": 4.0, - "learning_rate": 3e-06, - "loss": 0.8218, - "step": 99350 - }, - { - "epoch": 4.0, - "learning_rate": 3e-06, - "loss": 0.8176, - "step": 99400 - }, - { - "epoch": 4.0, - "learning_rate": 3e-06, - "loss": 0.8345, - "step": 99450 - }, - { - "epoch": 4.0, - "learning_rate": 3e-06, - "loss": 0.8259, - "step": 99500 - }, - { - "epoch": 4.0, - "learning_rate": 3e-06, - "loss": 0.8298, - "step": 99550 - }, - { - "epoch": 4.0, - "learning_rate": 3e-06, - "loss": 0.8183, - "step": 99600 - }, - { - "epoch": 4.0, - "learning_rate": 3e-06, - "loss": 0.83, - "step": 99650 - }, - { - "epoch": 4.01, - "learning_rate": 3e-06, - "loss": 0.8267, - "step": 99700 - }, - { - "epoch": 4.01, - "learning_rate": 3e-06, - "loss": 0.8235, - "step": 99750 - }, - { - "epoch": 4.01, - "learning_rate": 3e-06, - "loss": 0.8223, - "step": 99800 - }, - { - "epoch": 4.01, - "learning_rate": 3e-06, - "loss": 0.8331, - "step": 99850 - }, - { - "epoch": 4.01, - "learning_rate": 3e-06, - "loss": 0.82, - "step": 99900 - }, - { - "epoch": 4.01, - "learning_rate": 3e-06, - "loss": 0.8196, - "step": 99950 - }, - { - "epoch": 4.01, - "learning_rate": 3e-06, - "loss": 0.8333, - "step": 100000 - }, - { - "epoch": 4.01, - "eval_loss": 0.512780487537384, - "eval_runtime": 38.9453, - "eval_samples_per_second": 4.134, - "eval_steps_per_second": 0.077, - "eval_wer": 24.96954933008526, - "step": 100000 - }, - { - "epoch": 4.01, - "step": 100000, - "total_flos": 3.151209909694464e+20, - "train_loss": 0.2913371723175049, - "train_runtime": 623384.3788, - "train_samples_per_second": 41.066, - "train_steps_per_second": 0.16 + "epoch": 1.0, + "step": 2000, + "total_flos": 6.30243459072e+18, + "train_loss": 1.7388742218017579, + "train_runtime": 29801.9401, + "train_samples_per_second": 8.59, + "train_steps_per_second": 0.067 } ], - "max_steps": 100000, + "max_steps": 2000, "num_train_epochs": 9223372036854775807, - "total_flos": 3.151209909694464e+20, + "total_flos": 6.30243459072e+18, "trial_name": null, "trial_params": null }