diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,12925 @@ +{ + "best_metric": 24.878197320341048, + "best_model_checkpoint": "./checkpoint-98000", + "epoch": 4.00848, + "global_step": 100000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 1.5000000000000002e-07, + "loss": 2.936, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 3.0000000000000004e-07, + "loss": 2.8713, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 4.5e-07, + "loss": 2.7643, + "step": 150 + }, + { + "epoch": 0.0, + "learning_rate": 6.000000000000001e-07, + "loss": 2.6478, + "step": 200 + }, + { + "epoch": 0.0, + "learning_rate": 7.5e-07, + "loss": 2.5266, + "step": 250 + }, + { + "epoch": 0.0, + "learning_rate": 9e-07, + "loss": 2.4391, + "step": 300 + }, + { + "epoch": 0.0, + "learning_rate": 1.05e-06, + "loss": 2.3618, + "step": 350 + }, + { + "epoch": 0.0, + "learning_rate": 1.2000000000000002e-06, + "loss": 2.2763, + "step": 400 + }, + { + "epoch": 0.0, + "learning_rate": 1.35e-06, + "loss": 2.2461, + "step": 450 + }, + { + "epoch": 0.01, + "learning_rate": 1.5e-06, + "loss": 2.1815, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 1.65e-06, + "loss": 2.1294, + "step": 550 + }, + { + "epoch": 0.01, + "learning_rate": 1.8e-06, + "loss": 2.0958, + "step": 600 + }, + { + "epoch": 0.01, + "learning_rate": 1.95e-06, + "loss": 2.0656, + "step": 650 + }, + { + "epoch": 0.01, + "learning_rate": 2.1e-06, + "loss": 2.0392, + "step": 700 + }, + { + "epoch": 0.01, + "learning_rate": 2.25e-06, + "loss": 2.0037, + "step": 750 + }, + { + "epoch": 0.01, + "learning_rate": 2.4000000000000003e-06, + "loss": 1.9791, + "step": 800 + }, + { + "epoch": 0.01, + "learning_rate": 2.55e-06, + "loss": 1.9636, + "step": 850 + }, + { + "epoch": 0.01, + "learning_rate": 2.7e-06, + "loss": 1.9405, + "step": 900 + }, + { + "epoch": 0.01, + "learning_rate": 2.85e-06, + "loss": 1.9216, + "step": 950 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 1.8819, + "step": 1000 + }, + { + "epoch": 0.01, + "eval_loss": 1.1868911981582642, + "eval_runtime": 54.8495, + "eval_samples_per_second": 2.935, + "eval_steps_per_second": 0.055, + "eval_wer": 61.96711327649208, + "step": 1000 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 1.8776, + "step": 1050 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 1.8508, + "step": 1100 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 1.8361, + "step": 1150 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 1.8191, + "step": 1200 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 1.7992, + "step": 1250 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 1.7928, + "step": 1300 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 1.7808, + "step": 1350 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 1.7558, + "step": 1400 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 1.7655, + "step": 1450 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 1.7327, + "step": 1500 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 1.7395, + "step": 1550 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 1.7301, + "step": 1600 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 1.7016, + "step": 1650 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 1.6962, + "step": 1700 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 1.6992, + "step": 1750 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 1.6888, + "step": 1800 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 1.6646, + "step": 1850 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 1.6713, + "step": 1900 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 1.6562, + "step": 1950 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 1.6425, + "step": 2000 + }, + { + "epoch": 0.02, + "eval_loss": 0.9990558624267578, + "eval_runtime": 38.5229, + "eval_samples_per_second": 4.179, + "eval_steps_per_second": 0.078, + "eval_wer": 53.65408038976858, + "step": 2000 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 1.6376, + "step": 2050 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 1.6457, + "step": 2100 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 1.6244, + "step": 2150 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 1.6113, + "step": 2200 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 1.6096, + "step": 2250 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 1.6302, + "step": 2300 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 1.6078, + "step": 2350 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 1.6064, + "step": 2400 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 1.5884, + "step": 2450 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 1.5819, + "step": 2500 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 1.5784, + "step": 2550 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 1.5648, + "step": 2600 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 1.5758, + "step": 2650 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 1.5627, + "step": 2700 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 1.5573, + "step": 2750 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 1.5594, + "step": 2800 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 1.55, + "step": 2850 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 1.5583, + "step": 2900 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 1.5416, + "step": 2950 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 1.548, + "step": 3000 + }, + { + "epoch": 0.03, + "eval_loss": 0.9147223234176636, + "eval_runtime": 37.9927, + "eval_samples_per_second": 4.238, + "eval_steps_per_second": 0.079, + "eval_wer": 50.21315468940317, + "step": 3000 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 1.5347, + "step": 3050 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 1.5435, + "step": 3100 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 1.5246, + "step": 3150 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 1.5326, + "step": 3200 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 1.5094, + "step": 3250 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 1.5061, + "step": 3300 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 1.5139, + "step": 3350 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 1.5055, + "step": 3400 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 1.5109, + "step": 3450 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 1.4999, + "step": 3500 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 1.5015, + "step": 3550 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 1.4853, + "step": 3600 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 1.4836, + "step": 3650 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 1.4874, + "step": 3700 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 1.4758, + "step": 3750 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 1.462, + "step": 3800 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 1.4813, + "step": 3850 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 1.4836, + "step": 3900 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 1.4692, + "step": 3950 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 1.4636, + "step": 4000 + }, + { + "epoch": 0.04, + "eval_loss": 0.8605496287345886, + "eval_runtime": 40.5349, + "eval_samples_per_second": 3.972, + "eval_steps_per_second": 0.074, + "eval_wer": 47.07673568818514, + "step": 4000 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 1.4715, + "step": 4050 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 1.477, + "step": 4100 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 1.4539, + "step": 4150 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 1.4555, + "step": 4200 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 1.4423, + "step": 4250 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 1.4506, + "step": 4300 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 1.4404, + "step": 4350 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 1.4455, + "step": 4400 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 1.4463, + "step": 4450 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 1.4324, + "step": 4500 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 1.4317, + "step": 4550 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 1.4169, + "step": 4600 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 1.4282, + "step": 4650 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 1.4487, + "step": 4700 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 1.4348, + "step": 4750 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 1.4173, + "step": 4800 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 1.4247, + "step": 4850 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 1.4088, + "step": 4900 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 1.4198, + "step": 4950 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 1.4113, + "step": 5000 + }, + { + "epoch": 0.05, + "eval_loss": 0.8252834677696228, + "eval_runtime": 38.0695, + "eval_samples_per_second": 4.229, + "eval_steps_per_second": 0.079, + "eval_wer": 45.73690621193666, + "step": 5000 + }, + { + "epoch": 0.0, + "learning_rate": 3e-06, + "loss": 1.4062, + "step": 5050 + }, + { + "epoch": 0.0, + "learning_rate": 3e-06, + "loss": 1.3963, + "step": 5100 + }, + { + "epoch": 0.0, + "learning_rate": 3e-06, + "loss": 1.4073, + "step": 5150 + }, + { + "epoch": 0.0, + "learning_rate": 3e-06, + "loss": 1.3978, + "step": 5200 + }, + { + "epoch": 0.0, + "learning_rate": 3e-06, + "loss": 1.3838, + "step": 5250 + }, + { + "epoch": 0.0, + "learning_rate": 3e-06, + "loss": 1.3836, + "step": 5300 + }, + { + "epoch": 0.0, + "learning_rate": 3e-06, + "loss": 1.3974, + "step": 5350 + }, + { + "epoch": 0.0, + "learning_rate": 3e-06, + "loss": 1.3793, + "step": 5400 + }, + { + "epoch": 0.0, + "learning_rate": 3e-06, + "loss": 1.4009, + "step": 5450 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 1.3867, + "step": 5500 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 1.3756, + "step": 5550 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 1.3735, + "step": 5600 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 1.3735, + "step": 5650 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 1.3719, + "step": 5700 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 1.3661, + "step": 5750 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 1.3646, + "step": 5800 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 1.373, + "step": 5850 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 1.3669, + "step": 5900 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 1.3658, + "step": 5950 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 1.3484, + "step": 6000 + }, + { + "epoch": 0.01, + "eval_loss": 0.7946101427078247, + "eval_runtime": 39.2317, + "eval_samples_per_second": 4.104, + "eval_steps_per_second": 0.076, + "eval_wer": 43.4531059683313, + "step": 6000 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 1.3619, + "step": 6050 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 1.355, + "step": 6100 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 1.3529, + "step": 6150 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 1.3469, + "step": 6200 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 1.3411, + "step": 6250 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 1.3491, + "step": 6300 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 1.352, + "step": 6350 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 1.3355, + "step": 6400 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 1.3545, + "step": 6450 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 1.3312, + "step": 6500 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 1.3494, + "step": 6550 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 1.3443, + "step": 6600 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 1.3241, + "step": 6650 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 1.3295, + "step": 6700 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 1.3391, + "step": 6750 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 1.3323, + "step": 6800 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 1.3168, + "step": 6850 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 1.3301, + "step": 6900 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 1.3225, + "step": 6950 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 1.3127, + "step": 7000 + }, + { + "epoch": 0.02, + "eval_loss": 0.7740240097045898, + "eval_runtime": 39.5487, + "eval_samples_per_second": 4.071, + "eval_steps_per_second": 0.076, + "eval_wer": 42.265529841656516, + "step": 7000 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 1.314, + "step": 7050 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 1.3271, + "step": 7100 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 1.3121, + "step": 7150 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 1.3037, + "step": 7200 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 1.3075, + "step": 7250 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 1.328, + "step": 7300 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 1.3154, + "step": 7350 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 1.3151, + "step": 7400 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 1.3032, + "step": 7450 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 1.2986, + "step": 7500 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 1.2994, + "step": 7550 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 1.2927, + "step": 7600 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 1.305, + "step": 7650 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 1.2958, + "step": 7700 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 1.2945, + "step": 7750 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 1.2989, + "step": 7800 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 1.2951, + "step": 7850 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 1.305, + "step": 7900 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 1.2923, + "step": 7950 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 1.2994, + "step": 8000 + }, + { + "epoch": 0.03, + "eval_loss": 0.7550716996192932, + "eval_runtime": 39.5194, + "eval_samples_per_second": 4.074, + "eval_steps_per_second": 0.076, + "eval_wer": 40.895249695493305, + "step": 8000 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 1.2896, + "step": 8050 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 1.3027, + "step": 8100 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 1.2868, + "step": 8150 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 1.2963, + "step": 8200 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 1.2774, + "step": 8250 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 1.2754, + "step": 8300 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 1.2844, + "step": 8350 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 1.2806, + "step": 8400 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 1.2889, + "step": 8450 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 1.2804, + "step": 8500 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 1.2835, + "step": 8550 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 1.2706, + "step": 8600 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 1.2696, + "step": 8650 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 1.2776, + "step": 8700 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 1.2686, + "step": 8750 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 1.2566, + "step": 8800 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 1.2762, + "step": 8850 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 1.2805, + "step": 8900 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 1.2679, + "step": 8950 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 1.265, + "step": 9000 + }, + { + "epoch": 0.04, + "eval_loss": 0.737849235534668, + "eval_runtime": 40.6621, + "eval_samples_per_second": 3.959, + "eval_steps_per_second": 0.074, + "eval_wer": 39.859926918392205, + "step": 9000 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 1.2748, + "step": 9050 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 1.2816, + "step": 9100 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 1.2618, + "step": 9150 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 1.2648, + "step": 9200 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 1.2532, + "step": 9250 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 1.2633, + "step": 9300 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 1.2545, + "step": 9350 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 1.2625, + "step": 9400 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 1.2641, + "step": 9450 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 1.2514, + "step": 9500 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 1.2529, + "step": 9550 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 1.2397, + "step": 9600 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 1.2534, + "step": 9650 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 1.273, + "step": 9700 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 1.263, + "step": 9750 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 1.2452, + "step": 9800 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 1.2544, + "step": 9850 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 1.2393, + "step": 9900 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 1.2516, + "step": 9950 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 1.2458, + "step": 10000 + }, + { + "epoch": 0.05, + "eval_loss": 0.7256616353988647, + "eval_runtime": 39.9578, + "eval_samples_per_second": 4.029, + "eval_steps_per_second": 0.075, + "eval_wer": 39.89037758830694, + "step": 10000 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 1.2579, + "step": 10050 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 1.2544, + "step": 10100 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 1.2491, + "step": 10150 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 1.2535, + "step": 10200 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 1.2515, + "step": 10250 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 1.2431, + "step": 10300 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 1.2503, + "step": 10350 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 1.256, + "step": 10400 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 1.2445, + "step": 10450 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 1.2336, + "step": 10500 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 1.237, + "step": 10550 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 1.235, + "step": 10600 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 1.2435, + "step": 10650 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 1.2456, + "step": 10700 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 1.2269, + "step": 10750 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 1.2289, + "step": 10800 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 1.2379, + "step": 10850 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 1.2261, + "step": 10900 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 1.2363, + "step": 10950 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 1.2257, + "step": 11000 + }, + { + "epoch": 0.06, + "eval_loss": 0.7114033102989197, + "eval_runtime": 39.5013, + "eval_samples_per_second": 4.076, + "eval_steps_per_second": 0.076, + "eval_wer": 39.79902557856273, + "step": 11000 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 1.2315, + "step": 11050 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 1.229, + "step": 11100 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 1.2291, + "step": 11150 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 1.2311, + "step": 11200 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 1.2186, + "step": 11250 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 1.2326, + "step": 11300 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 1.2277, + "step": 11350 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 1.2311, + "step": 11400 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 1.2213, + "step": 11450 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 1.2163, + "step": 11500 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 1.2171, + "step": 11550 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 1.2288, + "step": 11600 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 1.2184, + "step": 11650 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 1.2118, + "step": 11700 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 1.2137, + "step": 11750 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 1.211, + "step": 11800 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 1.2264, + "step": 11850 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 1.2133, + "step": 11900 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 1.2104, + "step": 11950 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 1.2126, + "step": 12000 + }, + { + "epoch": 0.07, + "eval_loss": 0.6972322463989258, + "eval_runtime": 40.3496, + "eval_samples_per_second": 3.99, + "eval_steps_per_second": 0.074, + "eval_wer": 37.880633373934224, + "step": 12000 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 1.2101, + "step": 12050 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 1.2027, + "step": 12100 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 1.207, + "step": 12150 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 1.2193, + "step": 12200 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 1.2064, + "step": 12250 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 1.2109, + "step": 12300 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 1.2048, + "step": 12350 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 1.195, + "step": 12400 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 1.2037, + "step": 12450 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 1.1998, + "step": 12500 + }, + { + "epoch": 0.08, + "learning_rate": 3e-06, + "loss": 1.1935, + "step": 12550 + }, + { + "epoch": 0.08, + "learning_rate": 3e-06, + "loss": 1.1955, + "step": 12600 + }, + { + "epoch": 0.08, + "learning_rate": 3e-06, + "loss": 1.1994, + "step": 12650 + }, + { + "epoch": 0.08, + "learning_rate": 3e-06, + "loss": 1.2118, + "step": 12700 + }, + { + "epoch": 0.08, + "learning_rate": 3e-06, + "loss": 1.2017, + "step": 12750 + }, + { + "epoch": 0.08, + "learning_rate": 3e-06, + "loss": 1.2088, + "step": 12800 + }, + { + "epoch": 0.08, + "learning_rate": 3e-06, + "loss": 1.1909, + "step": 12850 + }, + { + "epoch": 0.08, + "learning_rate": 3e-06, + "loss": 1.2045, + "step": 12900 + }, + { + "epoch": 0.08, + "learning_rate": 3e-06, + "loss": 1.2031, + "step": 12950 + }, + { + "epoch": 0.08, + "learning_rate": 3e-06, + "loss": 1.1971, + "step": 13000 + }, + { + "epoch": 0.08, + "eval_loss": 0.6871449947357178, + "eval_runtime": 39.2495, + "eval_samples_per_second": 4.102, + "eval_steps_per_second": 0.076, + "eval_wer": 37.3020706455542, + "step": 13000 + }, + { + "epoch": 0.08, + "learning_rate": 3e-06, + "loss": 1.2081, + "step": 13050 + }, + { + "epoch": 0.08, + "learning_rate": 3e-06, + "loss": 1.1995, + "step": 13100 + }, + { + "epoch": 0.08, + "learning_rate": 3e-06, + "loss": 1.1953, + "step": 13150 + }, + { + "epoch": 0.08, + "learning_rate": 3e-06, + "loss": 1.1944, + "step": 13200 + }, + { + "epoch": 0.08, + "learning_rate": 3e-06, + "loss": 1.1872, + "step": 13250 + }, + { + "epoch": 1.0, + "learning_rate": 3e-06, + "loss": 1.177, + "step": 13300 + }, + { + "epoch": 1.0, + "learning_rate": 3e-06, + "loss": 1.1792, + "step": 13350 + }, + { + "epoch": 1.0, + "learning_rate": 3e-06, + "loss": 1.1717, + "step": 13400 + }, + { + "epoch": 1.0, + "learning_rate": 3e-06, + "loss": 1.1523, + "step": 13450 + }, + { + "epoch": 1.0, + "learning_rate": 3e-06, + "loss": 1.1751, + "step": 13500 + }, + { + "epoch": 1.0, + "learning_rate": 3e-06, + "loss": 1.1815, + "step": 13550 + }, + { + "epoch": 1.0, + "learning_rate": 3e-06, + "loss": 1.183, + "step": 13600 + }, + { + "epoch": 1.0, + "learning_rate": 3e-06, + "loss": 1.17, + "step": 13650 + }, + { + "epoch": 1.0, + "learning_rate": 3e-06, + "loss": 1.1812, + "step": 13700 + }, + { + "epoch": 1.0, + "learning_rate": 3e-06, + "loss": 1.1798, + "step": 13750 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 1.1626, + "step": 13800 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 1.1687, + "step": 13850 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 1.1676, + "step": 13900 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 1.1748, + "step": 13950 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 1.1786, + "step": 14000 + }, + { + "epoch": 1.01, + "eval_loss": 0.6785603761672974, + "eval_runtime": 39.6356, + "eval_samples_per_second": 4.062, + "eval_steps_per_second": 0.076, + "eval_wer": 37.42387332521315, + "step": 14000 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 1.172, + "step": 14050 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 1.1752, + "step": 14100 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 1.1765, + "step": 14150 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 1.1586, + "step": 14200 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 1.1578, + "step": 14250 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 1.1555, + "step": 14300 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 1.1659, + "step": 14350 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 1.1701, + "step": 14400 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 1.1687, + "step": 14450 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 1.1804, + "step": 14500 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 1.1636, + "step": 14550 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 1.1559, + "step": 14600 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 1.1555, + "step": 14650 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 1.1563, + "step": 14700 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 1.1508, + "step": 14750 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 1.1454, + "step": 14800 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 1.1555, + "step": 14850 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 1.1627, + "step": 14900 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 1.1578, + "step": 14950 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 1.1486, + "step": 15000 + }, + { + "epoch": 1.02, + "eval_loss": 0.6702972054481506, + "eval_runtime": 43.4585, + "eval_samples_per_second": 3.705, + "eval_steps_per_second": 0.069, + "eval_wer": 36.99756394640682, + "step": 15000 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 1.1655, + "step": 15050 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 1.1649, + "step": 15100 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 1.1609, + "step": 15150 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 1.1516, + "step": 15200 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 1.1635, + "step": 15250 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 1.1528, + "step": 15300 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 1.1688, + "step": 15350 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 1.1508, + "step": 15400 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 1.1511, + "step": 15450 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 1.1354, + "step": 15500 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 1.1562, + "step": 15550 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 1.1523, + "step": 15600 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 1.1469, + "step": 15650 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 1.1458, + "step": 15700 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 1.1455, + "step": 15750 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 1.1444, + "step": 15800 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 1.1582, + "step": 15850 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 1.1447, + "step": 15900 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 1.1494, + "step": 15950 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 1.1505, + "step": 16000 + }, + { + "epoch": 1.03, + "eval_loss": 0.6647058129310608, + "eval_runtime": 39.1012, + "eval_samples_per_second": 4.118, + "eval_steps_per_second": 0.077, + "eval_wer": 36.35809987819732, + "step": 16000 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 1.148, + "step": 16050 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 1.151, + "step": 16100 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 1.149, + "step": 16150 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 1.1413, + "step": 16200 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 1.1398, + "step": 16250 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 1.1406, + "step": 16300 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 1.1335, + "step": 16350 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 1.1326, + "step": 16400 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 1.1425, + "step": 16450 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 1.148, + "step": 16500 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 1.1368, + "step": 16550 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 1.1308, + "step": 16600 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 1.1298, + "step": 16650 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 1.1352, + "step": 16700 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 1.1327, + "step": 16750 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 1.1409, + "step": 16800 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 1.1263, + "step": 16850 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 1.1304, + "step": 16900 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 1.1298, + "step": 16950 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 1.1238, + "step": 17000 + }, + { + "epoch": 1.04, + "eval_loss": 0.6559091210365295, + "eval_runtime": 39.2532, + "eval_samples_per_second": 4.102, + "eval_steps_per_second": 0.076, + "eval_wer": 36.38855054811206, + "step": 17000 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 1.1387, + "step": 17050 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 1.1357, + "step": 17100 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 1.1308, + "step": 17150 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 1.1327, + "step": 17200 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 1.1278, + "step": 17250 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 1.1384, + "step": 17300 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 1.1433, + "step": 17350 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 1.1354, + "step": 17400 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 1.1356, + "step": 17450 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 1.1271, + "step": 17500 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 1.1263, + "step": 17550 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 1.1367, + "step": 17600 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 1.1222, + "step": 17650 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 1.1357, + "step": 17700 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 1.1127, + "step": 17750 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 1.1303, + "step": 17800 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 1.1174, + "step": 17850 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 1.1407, + "step": 17900 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 1.1321, + "step": 17950 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 1.1184, + "step": 18000 + }, + { + "epoch": 1.05, + "eval_loss": 0.6509166955947876, + "eval_runtime": 39.722, + "eval_samples_per_second": 4.053, + "eval_steps_per_second": 0.076, + "eval_wer": 36.510353227771006, + "step": 18000 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 1.1373, + "step": 18050 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 1.1208, + "step": 18100 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 1.1138, + "step": 18150 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 1.1173, + "step": 18200 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 1.1235, + "step": 18250 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 1.1106, + "step": 18300 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 1.1157, + "step": 18350 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 1.1321, + "step": 18400 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 1.1391, + "step": 18450 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 1.112, + "step": 18500 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 1.1163, + "step": 18550 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 1.1226, + "step": 18600 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 1.1053, + "step": 18650 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 1.1184, + "step": 18700 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 1.1134, + "step": 18750 + }, + { + "epoch": 1.06, + "learning_rate": 3e-06, + "loss": 1.1112, + "step": 18800 + }, + { + "epoch": 1.06, + "learning_rate": 3e-06, + "loss": 1.1194, + "step": 18850 + }, + { + "epoch": 1.06, + "learning_rate": 3e-06, + "loss": 1.1184, + "step": 18900 + }, + { + "epoch": 1.06, + "learning_rate": 3e-06, + "loss": 1.1182, + "step": 18950 + }, + { + "epoch": 1.06, + "learning_rate": 3e-06, + "loss": 1.115, + "step": 19000 + }, + { + "epoch": 1.06, + "eval_loss": 0.6451593041419983, + "eval_runtime": 41.32, + "eval_samples_per_second": 3.896, + "eval_steps_per_second": 0.073, + "eval_wer": 35.99269183922046, + "step": 19000 + }, + { + "epoch": 1.06, + "learning_rate": 3e-06, + "loss": 1.121, + "step": 19050 + }, + { + "epoch": 1.06, + "learning_rate": 3e-06, + "loss": 1.1133, + "step": 19100 + }, + { + "epoch": 1.06, + "learning_rate": 3e-06, + "loss": 1.1041, + "step": 19150 + }, + { + "epoch": 1.06, + "learning_rate": 3e-06, + "loss": 1.1116, + "step": 19200 + }, + { + "epoch": 1.06, + "learning_rate": 3e-06, + "loss": 1.112, + "step": 19250 + }, + { + "epoch": 1.06, + "learning_rate": 3e-06, + "loss": 1.112, + "step": 19300 + }, + { + "epoch": 1.06, + "learning_rate": 3e-06, + "loss": 1.1122, + "step": 19350 + }, + { + "epoch": 1.06, + "learning_rate": 3e-06, + "loss": 1.1059, + "step": 19400 + }, + { + "epoch": 1.06, + "learning_rate": 3e-06, + "loss": 1.1087, + "step": 19450 + }, + { + "epoch": 1.06, + "learning_rate": 3e-06, + "loss": 1.1146, + "step": 19500 + }, + { + "epoch": 1.06, + "learning_rate": 3e-06, + "loss": 1.1068, + "step": 19550 + }, + { + "epoch": 1.06, + "learning_rate": 3e-06, + "loss": 1.1058, + "step": 19600 + }, + { + "epoch": 1.06, + "learning_rate": 3e-06, + "loss": 1.112, + "step": 19650 + }, + { + "epoch": 1.06, + "learning_rate": 3e-06, + "loss": 1.0969, + "step": 19700 + }, + { + "epoch": 1.06, + "learning_rate": 3e-06, + "loss": 1.1134, + "step": 19750 + }, + { + "epoch": 1.07, + "learning_rate": 3e-06, + "loss": 1.0999, + "step": 19800 + }, + { + "epoch": 1.07, + "learning_rate": 3e-06, + "loss": 1.1006, + "step": 19850 + }, + { + "epoch": 1.07, + "learning_rate": 3e-06, + "loss": 1.1019, + "step": 19900 + }, + { + "epoch": 1.07, + "learning_rate": 3e-06, + "loss": 1.1105, + "step": 19950 + }, + { + "epoch": 1.07, + "learning_rate": 3e-06, + "loss": 1.1013, + "step": 20000 + }, + { + "epoch": 1.07, + "eval_loss": 0.6382384896278381, + "eval_runtime": 40.3978, + "eval_samples_per_second": 3.985, + "eval_steps_per_second": 0.074, + "eval_wer": 34.50060901339829, + "step": 20000 + }, + { + "epoch": 1.07, + "learning_rate": 3e-06, + "loss": 1.121, + "step": 20050 + }, + { + "epoch": 1.07, + "learning_rate": 3e-06, + "loss": 1.1122, + "step": 20100 + }, + { + "epoch": 1.07, + "learning_rate": 3e-06, + "loss": 1.1039, + "step": 20150 + }, + { + "epoch": 1.07, + "learning_rate": 3e-06, + "loss": 1.1013, + "step": 20200 + }, + { + "epoch": 1.07, + "learning_rate": 3e-06, + "loss": 1.1097, + "step": 20250 + }, + { + "epoch": 1.07, + "learning_rate": 3e-06, + "loss": 1.1002, + "step": 20300 + }, + { + "epoch": 1.07, + "learning_rate": 3e-06, + "loss": 1.0965, + "step": 20350 + }, + { + "epoch": 1.07, + "learning_rate": 3e-06, + "loss": 1.0903, + "step": 20400 + }, + { + "epoch": 1.07, + "learning_rate": 3e-06, + "loss": 1.1016, + "step": 20450 + }, + { + "epoch": 1.07, + "learning_rate": 3e-06, + "loss": 1.0923, + "step": 20500 + }, + { + "epoch": 1.07, + "learning_rate": 3e-06, + "loss": 1.0983, + "step": 20550 + }, + { + "epoch": 1.07, + "learning_rate": 3e-06, + "loss": 1.1011, + "step": 20600 + }, + { + "epoch": 1.07, + "learning_rate": 3e-06, + "loss": 1.0963, + "step": 20650 + }, + { + "epoch": 1.07, + "learning_rate": 3e-06, + "loss": 1.0952, + "step": 20700 + }, + { + "epoch": 1.07, + "learning_rate": 3e-06, + "loss": 1.0972, + "step": 20750 + }, + { + "epoch": 1.08, + "learning_rate": 3e-06, + "loss": 1.1087, + "step": 20800 + }, + { + "epoch": 1.08, + "learning_rate": 3e-06, + "loss": 1.0958, + "step": 20850 + }, + { + "epoch": 1.08, + "learning_rate": 3e-06, + "loss": 1.0902, + "step": 20900 + }, + { + "epoch": 1.08, + "learning_rate": 3e-06, + "loss": 1.0958, + "step": 20950 + }, + { + "epoch": 1.08, + "learning_rate": 3e-06, + "loss": 1.0969, + "step": 21000 + }, + { + "epoch": 1.08, + "eval_loss": 0.633127748966217, + "eval_runtime": 40.5043, + "eval_samples_per_second": 3.975, + "eval_steps_per_second": 0.074, + "eval_wer": 34.348355663824606, + "step": 21000 + }, + { + "epoch": 1.08, + "learning_rate": 3e-06, + "loss": 1.0988, + "step": 21050 + }, + { + "epoch": 1.08, + "learning_rate": 3e-06, + "loss": 1.0947, + "step": 21100 + }, + { + "epoch": 1.08, + "learning_rate": 3e-06, + "loss": 1.0964, + "step": 21150 + }, + { + "epoch": 1.08, + "learning_rate": 3e-06, + "loss": 1.0977, + "step": 21200 + }, + { + "epoch": 1.08, + "learning_rate": 3e-06, + "loss": 1.0986, + "step": 21250 + }, + { + "epoch": 1.08, + "learning_rate": 3e-06, + "loss": 1.1004, + "step": 21300 + }, + { + "epoch": 1.08, + "learning_rate": 3e-06, + "loss": 1.0951, + "step": 21350 + }, + { + "epoch": 1.08, + "learning_rate": 3e-06, + "loss": 1.086, + "step": 21400 + }, + { + "epoch": 1.08, + "learning_rate": 3e-06, + "loss": 1.0905, + "step": 21450 + }, + { + "epoch": 1.08, + "learning_rate": 3e-06, + "loss": 1.0967, + "step": 21500 + }, + { + "epoch": 1.08, + "learning_rate": 3e-06, + "loss": 1.079, + "step": 21550 + }, + { + "epoch": 2.0, + "learning_rate": 3e-06, + "loss": 1.0978, + "step": 21600 + }, + { + "epoch": 2.0, + "learning_rate": 3e-06, + "loss": 1.0779, + "step": 21650 + }, + { + "epoch": 2.0, + "learning_rate": 3e-06, + "loss": 1.0855, + "step": 21700 + }, + { + "epoch": 2.0, + "learning_rate": 3e-06, + "loss": 1.0775, + "step": 21750 + }, + { + "epoch": 2.0, + "learning_rate": 3e-06, + "loss": 1.068, + "step": 21800 + }, + { + "epoch": 2.0, + "learning_rate": 3e-06, + "loss": 1.0789, + "step": 21850 + }, + { + "epoch": 2.0, + "learning_rate": 3e-06, + "loss": 1.0748, + "step": 21900 + }, + { + "epoch": 2.0, + "learning_rate": 3e-06, + "loss": 1.075, + "step": 21950 + }, + { + "epoch": 2.0, + "learning_rate": 3e-06, + "loss": 1.0784, + "step": 22000 + }, + { + "epoch": 2.0, + "eval_loss": 0.6303825974464417, + "eval_runtime": 38.4993, + "eval_samples_per_second": 4.182, + "eval_steps_per_second": 0.078, + "eval_wer": 34.28745432399513, + "step": 22000 + }, + { + "epoch": 2.0, + "learning_rate": 3e-06, + "loss": 1.0746, + "step": 22050 + }, + { + "epoch": 2.01, + "learning_rate": 3e-06, + "loss": 1.0761, + "step": 22100 + }, + { + "epoch": 2.01, + "learning_rate": 3e-06, + "loss": 1.0793, + "step": 22150 + }, + { + "epoch": 2.01, + "learning_rate": 3e-06, + "loss": 1.0814, + "step": 22200 + }, + { + "epoch": 2.01, + "learning_rate": 3e-06, + "loss": 1.0801, + "step": 22250 + }, + { + "epoch": 2.01, + "learning_rate": 3e-06, + "loss": 1.0791, + "step": 22300 + }, + { + "epoch": 2.01, + "learning_rate": 3e-06, + "loss": 1.0718, + "step": 22350 + }, + { + "epoch": 2.01, + "learning_rate": 3e-06, + "loss": 1.0855, + "step": 22400 + }, + { + "epoch": 2.01, + "learning_rate": 3e-06, + "loss": 1.0949, + "step": 22450 + }, + { + "epoch": 2.01, + "learning_rate": 3e-06, + "loss": 1.0627, + "step": 22500 + }, + { + "epoch": 2.01, + "learning_rate": 3e-06, + "loss": 1.0873, + "step": 22550 + }, + { + "epoch": 2.01, + "learning_rate": 3e-06, + "loss": 1.0698, + "step": 22600 + }, + { + "epoch": 2.01, + "learning_rate": 3e-06, + "loss": 1.0667, + "step": 22650 + }, + { + "epoch": 2.01, + "learning_rate": 3e-06, + "loss": 1.0735, + "step": 22700 + }, + { + "epoch": 2.01, + "learning_rate": 3e-06, + "loss": 1.0821, + "step": 22750 + }, + { + "epoch": 2.01, + "learning_rate": 3e-06, + "loss": 1.0794, + "step": 22800 + }, + { + "epoch": 2.01, + "learning_rate": 3e-06, + "loss": 1.0646, + "step": 22850 + }, + { + "epoch": 2.01, + "learning_rate": 3e-06, + "loss": 1.0684, + "step": 22900 + }, + { + "epoch": 2.01, + "learning_rate": 3e-06, + "loss": 1.0673, + "step": 22950 + }, + { + "epoch": 2.01, + "learning_rate": 3e-06, + "loss": 1.0774, + "step": 23000 + }, + { + "epoch": 2.01, + "eval_loss": 0.6248754262924194, + "eval_runtime": 38.6299, + "eval_samples_per_second": 4.168, + "eval_steps_per_second": 0.078, + "eval_wer": 34.104750304506695, + "step": 23000 + }, + { + "epoch": 2.01, + "learning_rate": 3e-06, + "loss": 1.065, + "step": 23050 + }, + { + "epoch": 2.02, + "learning_rate": 3e-06, + "loss": 1.0757, + "step": 23100 + }, + { + "epoch": 2.02, + "learning_rate": 3e-06, + "loss": 1.0676, + "step": 23150 + }, + { + "epoch": 2.02, + "learning_rate": 3e-06, + "loss": 1.0604, + "step": 23200 + }, + { + "epoch": 2.02, + "learning_rate": 3e-06, + "loss": 1.071, + "step": 23250 + }, + { + "epoch": 2.02, + "learning_rate": 3e-06, + "loss": 1.0778, + "step": 23300 + }, + { + "epoch": 2.02, + "learning_rate": 3e-06, + "loss": 1.063, + "step": 23350 + }, + { + "epoch": 2.02, + "learning_rate": 3e-06, + "loss": 1.0705, + "step": 23400 + }, + { + "epoch": 2.02, + "learning_rate": 3e-06, + "loss": 1.0639, + "step": 23450 + }, + { + "epoch": 2.02, + "learning_rate": 3e-06, + "loss": 1.057, + "step": 23500 + }, + { + "epoch": 2.02, + "learning_rate": 3e-06, + "loss": 1.0617, + "step": 23550 + }, + { + "epoch": 2.02, + "learning_rate": 3e-06, + "loss": 1.0623, + "step": 23600 + }, + { + "epoch": 2.02, + "learning_rate": 3e-06, + "loss": 1.0778, + "step": 23650 + }, + { + "epoch": 2.02, + "learning_rate": 3e-06, + "loss": 1.0687, + "step": 23700 + }, + { + "epoch": 2.02, + "learning_rate": 3e-06, + "loss": 1.0626, + "step": 23750 + }, + { + "epoch": 2.02, + "learning_rate": 3e-06, + "loss": 1.0781, + "step": 23800 + }, + { + "epoch": 2.02, + "learning_rate": 3e-06, + "loss": 1.0579, + "step": 23850 + }, + { + "epoch": 2.02, + "learning_rate": 3e-06, + "loss": 1.0624, + "step": 23900 + }, + { + "epoch": 2.02, + "learning_rate": 3e-06, + "loss": 1.0719, + "step": 23950 + }, + { + "epoch": 2.02, + "learning_rate": 3e-06, + "loss": 1.0719, + "step": 24000 + }, + { + "epoch": 2.02, + "eval_loss": 0.6194329857826233, + "eval_runtime": 39.0738, + "eval_samples_per_second": 4.12, + "eval_steps_per_second": 0.077, + "eval_wer": 33.830694275274055, + "step": 24000 + }, + { + "epoch": 2.02, + "learning_rate": 3e-06, + "loss": 1.0779, + "step": 24050 + }, + { + "epoch": 2.03, + "learning_rate": 3e-06, + "loss": 1.0651, + "step": 24100 + }, + { + "epoch": 2.03, + "learning_rate": 3e-06, + "loss": 1.0654, + "step": 24150 + }, + { + "epoch": 2.03, + "learning_rate": 3e-06, + "loss": 1.0647, + "step": 24200 + }, + { + "epoch": 2.03, + "learning_rate": 3e-06, + "loss": 1.0693, + "step": 24250 + }, + { + "epoch": 2.03, + "learning_rate": 3e-06, + "loss": 1.0682, + "step": 24300 + }, + { + "epoch": 2.03, + "learning_rate": 3e-06, + "loss": 1.0733, + "step": 24350 + }, + { + "epoch": 2.03, + "learning_rate": 3e-06, + "loss": 1.0597, + "step": 24400 + }, + { + "epoch": 2.03, + "learning_rate": 3e-06, + "loss": 1.0737, + "step": 24450 + }, + { + "epoch": 2.03, + "learning_rate": 3e-06, + "loss": 1.0547, + "step": 24500 + }, + { + "epoch": 2.03, + "learning_rate": 3e-06, + "loss": 1.063, + "step": 24550 + }, + { + "epoch": 2.03, + "learning_rate": 3e-06, + "loss": 1.0582, + "step": 24600 + }, + { + "epoch": 2.03, + "learning_rate": 3e-06, + "loss": 1.06, + "step": 24650 + }, + { + "epoch": 2.03, + "learning_rate": 3e-06, + "loss": 1.0713, + "step": 24700 + }, + { + "epoch": 2.03, + "learning_rate": 3e-06, + "loss": 1.0699, + "step": 24750 + }, + { + "epoch": 2.03, + "learning_rate": 3e-06, + "loss": 1.0552, + "step": 24800 + }, + { + "epoch": 2.03, + "learning_rate": 3e-06, + "loss": 1.0566, + "step": 24850 + }, + { + "epoch": 2.03, + "learning_rate": 3e-06, + "loss": 1.048, + "step": 24900 + }, + { + "epoch": 2.03, + "learning_rate": 3e-06, + "loss": 1.0529, + "step": 24950 + }, + { + "epoch": 2.03, + "learning_rate": 3e-06, + "loss": 1.0638, + "step": 25000 + }, + { + "epoch": 2.03, + "eval_loss": 0.6157576441764832, + "eval_runtime": 39.7564, + "eval_samples_per_second": 4.05, + "eval_steps_per_second": 0.075, + "eval_wer": 32.97807551766139, + "step": 25000 + }, + { + "epoch": 2.03, + "learning_rate": 3e-06, + "loss": 1.0555, + "step": 25050 + }, + { + "epoch": 2.04, + "learning_rate": 3e-06, + "loss": 1.0485, + "step": 25100 + }, + { + "epoch": 2.04, + "learning_rate": 3e-06, + "loss": 1.0554, + "step": 25150 + }, + { + "epoch": 2.04, + "learning_rate": 3e-06, + "loss": 1.061, + "step": 25200 + }, + { + "epoch": 2.04, + "learning_rate": 3e-06, + "loss": 1.0582, + "step": 25250 + }, + { + "epoch": 2.04, + "learning_rate": 3e-06, + "loss": 1.0426, + "step": 25300 + }, + { + "epoch": 2.04, + "learning_rate": 3e-06, + "loss": 1.0551, + "step": 25350 + }, + { + "epoch": 2.04, + "learning_rate": 3e-06, + "loss": 1.0577, + "step": 25400 + }, + { + "epoch": 2.04, + "learning_rate": 3e-06, + "loss": 1.0811, + "step": 25450 + }, + { + "epoch": 2.04, + "learning_rate": 3e-06, + "loss": 1.0541, + "step": 25500 + }, + { + "epoch": 2.04, + "learning_rate": 3e-06, + "loss": 1.0446, + "step": 25550 + }, + { + "epoch": 2.04, + "learning_rate": 3e-06, + "loss": 1.0642, + "step": 25600 + }, + { + "epoch": 2.04, + "learning_rate": 3e-06, + "loss": 1.0492, + "step": 25650 + }, + { + "epoch": 2.04, + "learning_rate": 3e-06, + "loss": 1.0469, + "step": 25700 + }, + { + "epoch": 2.04, + "learning_rate": 3e-06, + "loss": 1.052, + "step": 25750 + }, + { + "epoch": 2.04, + "learning_rate": 3e-06, + "loss": 1.0534, + "step": 25800 + }, + { + "epoch": 2.04, + "learning_rate": 3e-06, + "loss": 1.0457, + "step": 25850 + }, + { + "epoch": 2.04, + "learning_rate": 3e-06, + "loss": 1.0459, + "step": 25900 + }, + { + "epoch": 2.04, + "learning_rate": 3e-06, + "loss": 1.0684, + "step": 25950 + }, + { + "epoch": 2.04, + "learning_rate": 3e-06, + "loss": 1.0592, + "step": 26000 + }, + { + "epoch": 2.04, + "eval_loss": 0.610471785068512, + "eval_runtime": 40.4875, + "eval_samples_per_second": 3.977, + "eval_steps_per_second": 0.074, + "eval_wer": 32.64311814859927, + "step": 26000 + }, + { + "epoch": 2.04, + "learning_rate": 3e-06, + "loss": 1.0649, + "step": 26050 + }, + { + "epoch": 2.05, + "learning_rate": 3e-06, + "loss": 1.0508, + "step": 26100 + }, + { + "epoch": 2.05, + "learning_rate": 3e-06, + "loss": 1.0518, + "step": 26150 + }, + { + "epoch": 2.05, + "learning_rate": 3e-06, + "loss": 1.0587, + "step": 26200 + }, + { + "epoch": 2.05, + "learning_rate": 3e-06, + "loss": 1.05, + "step": 26250 + }, + { + "epoch": 2.05, + "learning_rate": 3e-06, + "loss": 1.0449, + "step": 26300 + }, + { + "epoch": 2.05, + "learning_rate": 3e-06, + "loss": 1.0514, + "step": 26350 + }, + { + "epoch": 2.05, + "learning_rate": 3e-06, + "loss": 1.0542, + "step": 26400 + }, + { + "epoch": 2.05, + "learning_rate": 3e-06, + "loss": 1.0587, + "step": 26450 + }, + { + "epoch": 2.05, + "learning_rate": 3e-06, + "loss": 1.0566, + "step": 26500 + }, + { + "epoch": 2.05, + "learning_rate": 3e-06, + "loss": 1.0393, + "step": 26550 + }, + { + "epoch": 2.05, + "learning_rate": 3e-06, + "loss": 1.0537, + "step": 26600 + }, + { + "epoch": 2.05, + "learning_rate": 3e-06, + "loss": 1.0457, + "step": 26650 + }, + { + "epoch": 2.05, + "learning_rate": 3e-06, + "loss": 1.0475, + "step": 26700 + }, + { + "epoch": 2.05, + "learning_rate": 3e-06, + "loss": 1.0591, + "step": 26750 + }, + { + "epoch": 2.05, + "learning_rate": 3e-06, + "loss": 1.0476, + "step": 26800 + }, + { + "epoch": 2.05, + "learning_rate": 3e-06, + "loss": 1.0537, + "step": 26850 + }, + { + "epoch": 2.05, + "learning_rate": 3e-06, + "loss": 1.0414, + "step": 26900 + }, + { + "epoch": 2.05, + "learning_rate": 3e-06, + "loss": 1.0596, + "step": 26950 + }, + { + "epoch": 2.05, + "learning_rate": 3e-06, + "loss": 1.0493, + "step": 27000 + }, + { + "epoch": 2.05, + "eval_loss": 0.6040655970573425, + "eval_runtime": 38.168, + "eval_samples_per_second": 4.218, + "eval_steps_per_second": 0.079, + "eval_wer": 32.734470158343484, + "step": 27000 + }, + { + "epoch": 2.05, + "learning_rate": 3e-06, + "loss": 1.048, + "step": 27050 + }, + { + "epoch": 2.06, + "learning_rate": 3e-06, + "loss": 1.0542, + "step": 27100 + }, + { + "epoch": 2.06, + "learning_rate": 3e-06, + "loss": 1.0474, + "step": 27150 + }, + { + "epoch": 2.06, + "learning_rate": 3e-06, + "loss": 1.0424, + "step": 27200 + }, + { + "epoch": 2.06, + "learning_rate": 3e-06, + "loss": 1.0337, + "step": 27250 + }, + { + "epoch": 2.06, + "learning_rate": 3e-06, + "loss": 1.0424, + "step": 27300 + }, + { + "epoch": 2.06, + "learning_rate": 3e-06, + "loss": 1.0342, + "step": 27350 + }, + { + "epoch": 2.06, + "learning_rate": 3e-06, + "loss": 1.0401, + "step": 27400 + }, + { + "epoch": 2.06, + "learning_rate": 3e-06, + "loss": 1.0388, + "step": 27450 + }, + { + "epoch": 2.06, + "learning_rate": 3e-06, + "loss": 1.0323, + "step": 27500 + }, + { + "epoch": 2.06, + "learning_rate": 3e-06, + "loss": 1.0361, + "step": 27550 + }, + { + "epoch": 2.06, + "learning_rate": 3e-06, + "loss": 1.0322, + "step": 27600 + }, + { + "epoch": 2.06, + "learning_rate": 3e-06, + "loss": 1.0474, + "step": 27650 + }, + { + "epoch": 2.06, + "learning_rate": 3e-06, + "loss": 1.05, + "step": 27700 + }, + { + "epoch": 2.06, + "learning_rate": 3e-06, + "loss": 1.0501, + "step": 27750 + }, + { + "epoch": 2.06, + "learning_rate": 3e-06, + "loss": 1.0425, + "step": 27800 + }, + { + "epoch": 2.06, + "learning_rate": 3e-06, + "loss": 1.0421, + "step": 27850 + }, + { + "epoch": 2.06, + "learning_rate": 3e-06, + "loss": 1.0453, + "step": 27900 + }, + { + "epoch": 2.06, + "learning_rate": 3e-06, + "loss": 1.0352, + "step": 27950 + }, + { + "epoch": 2.06, + "learning_rate": 3e-06, + "loss": 1.047, + "step": 28000 + }, + { + "epoch": 2.06, + "eval_loss": 0.6039990186691284, + "eval_runtime": 39.0034, + "eval_samples_per_second": 4.128, + "eval_steps_per_second": 0.077, + "eval_wer": 32.76492082825822, + "step": 28000 + }, + { + "epoch": 2.06, + "learning_rate": 3e-06, + "loss": 1.0366, + "step": 28050 + }, + { + "epoch": 2.07, + "learning_rate": 3e-06, + "loss": 1.0588, + "step": 28100 + }, + { + "epoch": 2.07, + "learning_rate": 3e-06, + "loss": 1.0429, + "step": 28150 + }, + { + "epoch": 2.07, + "learning_rate": 3e-06, + "loss": 1.0461, + "step": 28200 + }, + { + "epoch": 2.07, + "learning_rate": 3e-06, + "loss": 1.0347, + "step": 28250 + }, + { + "epoch": 2.07, + "learning_rate": 3e-06, + "loss": 1.038, + "step": 28300 + }, + { + "epoch": 2.07, + "learning_rate": 3e-06, + "loss": 1.0361, + "step": 28350 + }, + { + "epoch": 2.07, + "learning_rate": 3e-06, + "loss": 1.0226, + "step": 28400 + }, + { + "epoch": 2.07, + "learning_rate": 3e-06, + "loss": 1.0337, + "step": 28450 + }, + { + "epoch": 2.07, + "learning_rate": 3e-06, + "loss": 1.0351, + "step": 28500 + }, + { + "epoch": 2.07, + "learning_rate": 3e-06, + "loss": 1.0279, + "step": 28550 + }, + { + "epoch": 2.07, + "learning_rate": 3e-06, + "loss": 1.0411, + "step": 28600 + }, + { + "epoch": 2.07, + "learning_rate": 3e-06, + "loss": 1.0384, + "step": 28650 + }, + { + "epoch": 2.07, + "learning_rate": 3e-06, + "loss": 1.0434, + "step": 28700 + }, + { + "epoch": 2.07, + "learning_rate": 3e-06, + "loss": 1.0414, + "step": 28750 + }, + { + "epoch": 2.07, + "learning_rate": 3e-06, + "loss": 1.0305, + "step": 28800 + }, + { + "epoch": 2.07, + "learning_rate": 3e-06, + "loss": 1.0301, + "step": 28850 + }, + { + "epoch": 2.07, + "learning_rate": 3e-06, + "loss": 1.0293, + "step": 28900 + }, + { + "epoch": 2.07, + "learning_rate": 3e-06, + "loss": 1.0324, + "step": 28950 + }, + { + "epoch": 2.07, + "learning_rate": 3e-06, + "loss": 1.0323, + "step": 29000 + }, + { + "epoch": 2.07, + "eval_loss": 0.5984179377555847, + "eval_runtime": 38.0029, + "eval_samples_per_second": 4.237, + "eval_steps_per_second": 0.079, + "eval_wer": 31.60779537149817, + "step": 29000 + }, + { + "epoch": 2.07, + "learning_rate": 3e-06, + "loss": 1.0444, + "step": 29050 + }, + { + "epoch": 2.08, + "learning_rate": 3e-06, + "loss": 1.0328, + "step": 29100 + }, + { + "epoch": 2.08, + "learning_rate": 3e-06, + "loss": 1.0334, + "step": 29150 + }, + { + "epoch": 2.08, + "learning_rate": 3e-06, + "loss": 1.0332, + "step": 29200 + }, + { + "epoch": 2.08, + "learning_rate": 3e-06, + "loss": 1.0297, + "step": 29250 + }, + { + "epoch": 2.08, + "learning_rate": 3e-06, + "loss": 1.0441, + "step": 29300 + }, + { + "epoch": 2.08, + "learning_rate": 3e-06, + "loss": 1.0319, + "step": 29350 + }, + { + "epoch": 2.08, + "learning_rate": 3e-06, + "loss": 1.0454, + "step": 29400 + }, + { + "epoch": 2.08, + "learning_rate": 3e-06, + "loss": 1.0286, + "step": 29450 + }, + { + "epoch": 2.08, + "learning_rate": 3e-06, + "loss": 1.0227, + "step": 29500 + }, + { + "epoch": 2.08, + "learning_rate": 3e-06, + "loss": 1.0299, + "step": 29550 + }, + { + "epoch": 2.08, + "learning_rate": 3e-06, + "loss": 1.0317, + "step": 29600 + }, + { + "epoch": 2.08, + "learning_rate": 3e-06, + "loss": 1.0227, + "step": 29650 + }, + { + "epoch": 2.08, + "learning_rate": 3e-06, + "loss": 1.0374, + "step": 29700 + }, + { + "epoch": 2.08, + "learning_rate": 3e-06, + "loss": 1.0352, + "step": 29750 + }, + { + "epoch": 2.08, + "learning_rate": 3e-06, + "loss": 1.0176, + "step": 29800 + }, + { + "epoch": 2.08, + "learning_rate": 3e-06, + "loss": 1.0285, + "step": 29850 + }, + { + "epoch": 3.0, + "learning_rate": 3e-06, + "loss": 1.0243, + "step": 29900 + }, + { + "epoch": 3.0, + "learning_rate": 3e-06, + "loss": 1.0192, + "step": 29950 + }, + { + "epoch": 3.0, + "learning_rate": 3e-06, + "loss": 1.0189, + "step": 30000 + }, + { + "epoch": 3.0, + "eval_loss": 0.5957360863685608, + "eval_runtime": 30.8132, + "eval_samples_per_second": 5.225, + "eval_steps_per_second": 0.097, + "eval_wer": 31.303288672350792, + "step": 30000 + }, + { + "epoch": 3.0, + "learning_rate": 3e-06, + "loss": 1.0133, + "step": 30050 + }, + { + "epoch": 3.0, + "learning_rate": 3e-06, + "loss": 1.0144, + "step": 30100 + }, + { + "epoch": 3.0, + "learning_rate": 3e-06, + "loss": 1.0279, + "step": 30150 + }, + { + "epoch": 3.0, + "learning_rate": 3e-06, + "loss": 1.0148, + "step": 30200 + }, + { + "epoch": 3.0, + "learning_rate": 3e-06, + "loss": 1.0192, + "step": 30250 + }, + { + "epoch": 3.0, + "learning_rate": 3e-06, + "loss": 1.0246, + "step": 30300 + }, + { + "epoch": 3.0, + "learning_rate": 3e-06, + "loss": 1.026, + "step": 30350 + }, + { + "epoch": 3.01, + "learning_rate": 3e-06, + "loss": 1.0173, + "step": 30400 + }, + { + "epoch": 3.01, + "learning_rate": 3e-06, + "loss": 1.0273, + "step": 30450 + }, + { + "epoch": 3.01, + "learning_rate": 3e-06, + "loss": 1.0306, + "step": 30500 + }, + { + "epoch": 3.01, + "learning_rate": 3e-06, + "loss": 1.0115, + "step": 30550 + }, + { + "epoch": 3.01, + "learning_rate": 3e-06, + "loss": 1.0172, + "step": 30600 + }, + { + "epoch": 3.01, + "learning_rate": 3e-06, + "loss": 1.0068, + "step": 30650 + }, + { + "epoch": 3.01, + "learning_rate": 3e-06, + "loss": 1.0066, + "step": 30700 + }, + { + "epoch": 3.01, + "learning_rate": 3e-06, + "loss": 1.0204, + "step": 30750 + }, + { + "epoch": 3.01, + "learning_rate": 3e-06, + "loss": 1.0206, + "step": 30800 + }, + { + "epoch": 3.01, + "learning_rate": 3e-06, + "loss": 1.0158, + "step": 30850 + }, + { + "epoch": 3.01, + "learning_rate": 3e-06, + "loss": 1.0117, + "step": 30900 + }, + { + "epoch": 3.01, + "learning_rate": 3e-06, + "loss": 1.0096, + "step": 30950 + }, + { + "epoch": 3.01, + "learning_rate": 3e-06, + "loss": 1.0078, + "step": 31000 + }, + { + "epoch": 3.01, + "eval_loss": 0.5924085378646851, + "eval_runtime": 31.4548, + "eval_samples_per_second": 5.118, + "eval_steps_per_second": 0.095, + "eval_wer": 31.425091352009744, + "step": 31000 + }, + { + "epoch": 3.01, + "learning_rate": 3e-06, + "loss": 1.0176, + "step": 31050 + }, + { + "epoch": 3.01, + "learning_rate": 3e-06, + "loss": 1.0132, + "step": 31100 + }, + { + "epoch": 3.01, + "learning_rate": 3e-06, + "loss": 1.02, + "step": 31150 + }, + { + "epoch": 3.01, + "learning_rate": 3e-06, + "loss": 1.0144, + "step": 31200 + }, + { + "epoch": 3.01, + "learning_rate": 3e-06, + "loss": 1.0031, + "step": 31250 + }, + { + "epoch": 3.01, + "learning_rate": 3e-06, + "loss": 1.0067, + "step": 31300 + }, + { + "epoch": 3.01, + "learning_rate": 3e-06, + "loss": 1.0025, + "step": 31350 + }, + { + "epoch": 3.02, + "learning_rate": 3e-06, + "loss": 1.0099, + "step": 31400 + }, + { + "epoch": 3.02, + "learning_rate": 3e-06, + "loss": 1.0186, + "step": 31450 + }, + { + "epoch": 3.02, + "learning_rate": 3e-06, + "loss": 1.0235, + "step": 31500 + }, + { + "epoch": 3.02, + "learning_rate": 3e-06, + "loss": 1.015, + "step": 31550 + }, + { + "epoch": 3.02, + "learning_rate": 3e-06, + "loss": 1.0154, + "step": 31600 + }, + { + "epoch": 3.02, + "learning_rate": 3e-06, + "loss": 1.0117, + "step": 31650 + }, + { + "epoch": 3.02, + "learning_rate": 3e-06, + "loss": 1.0027, + "step": 31700 + }, + { + "epoch": 3.02, + "learning_rate": 3e-06, + "loss": 1.0134, + "step": 31750 + }, + { + "epoch": 3.02, + "learning_rate": 3e-06, + "loss": 1.0055, + "step": 31800 + }, + { + "epoch": 3.02, + "learning_rate": 3e-06, + "loss": 1.0027, + "step": 31850 + }, + { + "epoch": 3.02, + "learning_rate": 3e-06, + "loss": 1.0099, + "step": 31900 + }, + { + "epoch": 3.02, + "learning_rate": 3e-06, + "loss": 1.0037, + "step": 31950 + }, + { + "epoch": 3.02, + "learning_rate": 3e-06, + "loss": 1.0146, + "step": 32000 + }, + { + "epoch": 3.02, + "eval_loss": 0.594041645526886, + "eval_runtime": 32.4443, + "eval_samples_per_second": 4.962, + "eval_steps_per_second": 0.092, + "eval_wer": 31.303288672350792, + "step": 32000 + }, + { + "epoch": 3.02, + "learning_rate": 3e-06, + "loss": 1.0069, + "step": 32050 + }, + { + "epoch": 3.02, + "learning_rate": 3e-06, + "loss": 1.0082, + "step": 32100 + }, + { + "epoch": 3.02, + "learning_rate": 3e-06, + "loss": 1.0154, + "step": 32150 + }, + { + "epoch": 3.02, + "learning_rate": 3e-06, + "loss": 1.0227, + "step": 32200 + }, + { + "epoch": 3.02, + "learning_rate": 3e-06, + "loss": 0.9945, + "step": 32250 + }, + { + "epoch": 3.02, + "learning_rate": 3e-06, + "loss": 1.0048, + "step": 32300 + }, + { + "epoch": 3.02, + "learning_rate": 3e-06, + "loss": 1.0101, + "step": 32350 + }, + { + "epoch": 3.03, + "learning_rate": 3e-06, + "loss": 0.9971, + "step": 32400 + }, + { + "epoch": 3.03, + "learning_rate": 3e-06, + "loss": 1.0, + "step": 32450 + }, + { + "epoch": 3.03, + "learning_rate": 3e-06, + "loss": 1.006, + "step": 32500 + }, + { + "epoch": 3.03, + "learning_rate": 3e-06, + "loss": 1.0083, + "step": 32550 + }, + { + "epoch": 3.03, + "learning_rate": 3e-06, + "loss": 1.006, + "step": 32600 + }, + { + "epoch": 3.03, + "learning_rate": 3e-06, + "loss": 1.0073, + "step": 32650 + }, + { + "epoch": 3.03, + "learning_rate": 3e-06, + "loss": 1.0048, + "step": 32700 + }, + { + "epoch": 3.03, + "learning_rate": 3e-06, + "loss": 1.0015, + "step": 32750 + }, + { + "epoch": 3.03, + "learning_rate": 3e-06, + "loss": 1.0101, + "step": 32800 + }, + { + "epoch": 3.03, + "learning_rate": 3e-06, + "loss": 1.01, + "step": 32850 + }, + { + "epoch": 3.03, + "learning_rate": 3e-06, + "loss": 1.0133, + "step": 32900 + }, + { + "epoch": 3.03, + "learning_rate": 3e-06, + "loss": 1.0069, + "step": 32950 + }, + { + "epoch": 3.03, + "learning_rate": 3e-06, + "loss": 1.0128, + "step": 33000 + }, + { + "epoch": 3.03, + "eval_loss": 0.5891727805137634, + "eval_runtime": 32.6258, + "eval_samples_per_second": 4.935, + "eval_steps_per_second": 0.092, + "eval_wer": 31.02923264311815, + "step": 33000 + }, + { + "epoch": 3.03, + "learning_rate": 3e-06, + "loss": 1.0075, + "step": 33050 + }, + { + "epoch": 3.03, + "learning_rate": 3e-06, + "loss": 1.0175, + "step": 33100 + }, + { + "epoch": 3.03, + "learning_rate": 3e-06, + "loss": 1.0083, + "step": 33150 + }, + { + "epoch": 3.03, + "learning_rate": 3e-06, + "loss": 0.9995, + "step": 33200 + }, + { + "epoch": 3.03, + "learning_rate": 3e-06, + "loss": 0.9996, + "step": 33250 + }, + { + "epoch": 3.03, + "learning_rate": 3e-06, + "loss": 1.0072, + "step": 33300 + }, + { + "epoch": 3.03, + "learning_rate": 3e-06, + "loss": 1.0196, + "step": 33350 + }, + { + "epoch": 3.04, + "learning_rate": 3e-06, + "loss": 1.0071, + "step": 33400 + }, + { + "epoch": 3.04, + "learning_rate": 3e-06, + "loss": 1.0073, + "step": 33450 + }, + { + "epoch": 3.04, + "learning_rate": 3e-06, + "loss": 1.012, + "step": 33500 + }, + { + "epoch": 3.04, + "learning_rate": 3e-06, + "loss": 1.0137, + "step": 33550 + }, + { + "epoch": 3.04, + "learning_rate": 3e-06, + "loss": 0.9993, + "step": 33600 + }, + { + "epoch": 3.04, + "learning_rate": 3e-06, + "loss": 1.0025, + "step": 33650 + }, + { + "epoch": 3.04, + "learning_rate": 3e-06, + "loss": 1.0055, + "step": 33700 + }, + { + "epoch": 3.04, + "learning_rate": 3e-06, + "loss": 1.0013, + "step": 33750 + }, + { + "epoch": 3.04, + "learning_rate": 3e-06, + "loss": 1.0068, + "step": 33800 + }, + { + "epoch": 3.04, + "learning_rate": 3e-06, + "loss": 1.0018, + "step": 33850 + }, + { + "epoch": 3.04, + "learning_rate": 3e-06, + "loss": 0.9998, + "step": 33900 + }, + { + "epoch": 3.04, + "learning_rate": 3e-06, + "loss": 0.9934, + "step": 33950 + }, + { + "epoch": 3.04, + "learning_rate": 3e-06, + "loss": 1.0025, + "step": 34000 + }, + { + "epoch": 3.04, + "eval_loss": 0.5873314738273621, + "eval_runtime": 32.6141, + "eval_samples_per_second": 4.937, + "eval_steps_per_second": 0.092, + "eval_wer": 31.181485992691837, + "step": 34000 + }, + { + "epoch": 3.04, + "learning_rate": 3e-06, + "loss": 1.0072, + "step": 34050 + }, + { + "epoch": 3.04, + "learning_rate": 3e-06, + "loss": 0.9934, + "step": 34100 + }, + { + "epoch": 3.04, + "learning_rate": 3e-06, + "loss": 1.0179, + "step": 34150 + }, + { + "epoch": 3.04, + "learning_rate": 3e-06, + "loss": 0.9992, + "step": 34200 + }, + { + "epoch": 3.04, + "learning_rate": 3e-06, + "loss": 0.9973, + "step": 34250 + }, + { + "epoch": 3.04, + "learning_rate": 3e-06, + "loss": 1.0184, + "step": 34300 + }, + { + "epoch": 3.04, + "learning_rate": 3e-06, + "loss": 1.0049, + "step": 34350 + }, + { + "epoch": 3.05, + "learning_rate": 3e-06, + "loss": 1.009, + "step": 34400 + }, + { + "epoch": 3.05, + "learning_rate": 3e-06, + "loss": 1.0099, + "step": 34450 + }, + { + "epoch": 3.05, + "learning_rate": 3e-06, + "loss": 1.0086, + "step": 34500 + }, + { + "epoch": 3.05, + "learning_rate": 3e-06, + "loss": 1.0038, + "step": 34550 + }, + { + "epoch": 3.05, + "learning_rate": 3e-06, + "loss": 0.9973, + "step": 34600 + }, + { + "epoch": 3.05, + "learning_rate": 3e-06, + "loss": 1.0189, + "step": 34650 + }, + { + "epoch": 3.05, + "learning_rate": 3e-06, + "loss": 1.0017, + "step": 34700 + }, + { + "epoch": 3.05, + "learning_rate": 3e-06, + "loss": 1.0013, + "step": 34750 + }, + { + "epoch": 3.05, + "learning_rate": 3e-06, + "loss": 0.9962, + "step": 34800 + }, + { + "epoch": 3.05, + "learning_rate": 3e-06, + "loss": 1.0062, + "step": 34850 + }, + { + "epoch": 3.05, + "learning_rate": 3e-06, + "loss": 0.9956, + "step": 34900 + }, + { + "epoch": 3.05, + "learning_rate": 3e-06, + "loss": 0.9908, + "step": 34950 + }, + { + "epoch": 3.05, + "learning_rate": 3e-06, + "loss": 0.999, + "step": 35000 + }, + { + "epoch": 3.05, + "eval_loss": 0.5838064551353455, + "eval_runtime": 29.7975, + "eval_samples_per_second": 5.403, + "eval_steps_per_second": 0.101, + "eval_wer": 30.633373934226555, + "step": 35000 + }, + { + "epoch": 3.05, + "learning_rate": 3e-06, + "loss": 1.0019, + "step": 35050 + }, + { + "epoch": 3.05, + "learning_rate": 3e-06, + "loss": 1.0049, + "step": 35100 + }, + { + "epoch": 3.05, + "learning_rate": 3e-06, + "loss": 1.0116, + "step": 35150 + }, + { + "epoch": 3.05, + "learning_rate": 3e-06, + "loss": 1.0041, + "step": 35200 + }, + { + "epoch": 3.05, + "learning_rate": 3e-06, + "loss": 1.0047, + "step": 35250 + }, + { + "epoch": 3.05, + "learning_rate": 3e-06, + "loss": 1.003, + "step": 35300 + }, + { + "epoch": 3.05, + "learning_rate": 3e-06, + "loss": 0.9984, + "step": 35350 + }, + { + "epoch": 3.06, + "learning_rate": 3e-06, + "loss": 1.0072, + "step": 35400 + }, + { + "epoch": 3.06, + "learning_rate": 3e-06, + "loss": 0.9937, + "step": 35450 + }, + { + "epoch": 3.06, + "learning_rate": 3e-06, + "loss": 0.9983, + "step": 35500 + }, + { + "epoch": 3.06, + "learning_rate": 3e-06, + "loss": 1.0007, + "step": 35550 + }, + { + "epoch": 3.06, + "learning_rate": 3e-06, + "loss": 0.998, + "step": 35600 + }, + { + "epoch": 3.06, + "learning_rate": 3e-06, + "loss": 0.9986, + "step": 35650 + }, + { + "epoch": 3.06, + "learning_rate": 3e-06, + "loss": 1.0009, + "step": 35700 + }, + { + "epoch": 3.06, + "learning_rate": 3e-06, + "loss": 0.9982, + "step": 35750 + }, + { + "epoch": 3.06, + "learning_rate": 3e-06, + "loss": 1.0009, + "step": 35800 + }, + { + "epoch": 3.06, + "learning_rate": 3e-06, + "loss": 0.9946, + "step": 35850 + }, + { + "epoch": 3.06, + "learning_rate": 3e-06, + "loss": 1.0014, + "step": 35900 + }, + { + "epoch": 3.06, + "learning_rate": 3e-06, + "loss": 1.0009, + "step": 35950 + }, + { + "epoch": 3.06, + "learning_rate": 3e-06, + "loss": 1.0045, + "step": 36000 + }, + { + "epoch": 3.06, + "eval_loss": 0.5799316167831421, + "eval_runtime": 35.1205, + "eval_samples_per_second": 4.584, + "eval_steps_per_second": 0.085, + "eval_wer": 30.420219244823386, + "step": 36000 + }, + { + "epoch": 3.06, + "learning_rate": 3e-06, + "loss": 0.9923, + "step": 36050 + }, + { + "epoch": 3.06, + "learning_rate": 3e-06, + "loss": 0.9973, + "step": 36100 + }, + { + "epoch": 3.06, + "learning_rate": 3e-06, + "loss": 0.9879, + "step": 36150 + }, + { + "epoch": 3.06, + "learning_rate": 3e-06, + "loss": 0.9857, + "step": 36200 + }, + { + "epoch": 3.06, + "learning_rate": 3e-06, + "loss": 0.9931, + "step": 36250 + }, + { + "epoch": 3.06, + "learning_rate": 3e-06, + "loss": 0.9926, + "step": 36300 + }, + { + "epoch": 3.06, + "learning_rate": 3e-06, + "loss": 0.9892, + "step": 36350 + }, + { + "epoch": 3.07, + "learning_rate": 3e-06, + "loss": 0.982, + "step": 36400 + }, + { + "epoch": 3.07, + "learning_rate": 3e-06, + "loss": 0.9962, + "step": 36450 + }, + { + "epoch": 3.07, + "learning_rate": 3e-06, + "loss": 0.9871, + "step": 36500 + }, + { + "epoch": 3.07, + "learning_rate": 3e-06, + "loss": 0.9868, + "step": 36550 + }, + { + "epoch": 3.07, + "learning_rate": 3e-06, + "loss": 1.0011, + "step": 36600 + }, + { + "epoch": 3.07, + "learning_rate": 3e-06, + "loss": 0.9873, + "step": 36650 + }, + { + "epoch": 3.07, + "learning_rate": 3e-06, + "loss": 0.9946, + "step": 36700 + }, + { + "epoch": 3.07, + "learning_rate": 3e-06, + "loss": 1.004, + "step": 36750 + }, + { + "epoch": 3.07, + "learning_rate": 3e-06, + "loss": 0.9963, + "step": 36800 + }, + { + "epoch": 3.07, + "learning_rate": 3e-06, + "loss": 0.9822, + "step": 36850 + }, + { + "epoch": 3.07, + "learning_rate": 3e-06, + "loss": 0.9949, + "step": 36900 + }, + { + "epoch": 3.07, + "learning_rate": 3e-06, + "loss": 0.9866, + "step": 36950 + }, + { + "epoch": 3.07, + "learning_rate": 3e-06, + "loss": 1.0005, + "step": 37000 + }, + { + "epoch": 3.07, + "eval_loss": 0.5770267248153687, + "eval_runtime": 32.6377, + "eval_samples_per_second": 4.933, + "eval_steps_per_second": 0.092, + "eval_wer": 30.176613885505482, + "step": 37000 + }, + { + "epoch": 3.07, + "learning_rate": 3e-06, + "loss": 0.9882, + "step": 37050 + }, + { + "epoch": 3.07, + "learning_rate": 3e-06, + "loss": 0.9887, + "step": 37100 + }, + { + "epoch": 3.07, + "learning_rate": 3e-06, + "loss": 0.9863, + "step": 37150 + }, + { + "epoch": 3.07, + "learning_rate": 3e-06, + "loss": 0.9874, + "step": 37200 + }, + { + "epoch": 3.07, + "learning_rate": 3e-06, + "loss": 0.996, + "step": 37250 + }, + { + "epoch": 3.07, + "learning_rate": 3e-06, + "loss": 1.0031, + "step": 37300 + }, + { + "epoch": 3.07, + "learning_rate": 3e-06, + "loss": 0.9861, + "step": 37350 + }, + { + "epoch": 3.08, + "learning_rate": 3e-06, + "loss": 0.9843, + "step": 37400 + }, + { + "epoch": 3.08, + "learning_rate": 3e-06, + "loss": 0.9873, + "step": 37450 + }, + { + "epoch": 3.08, + "learning_rate": 3e-06, + "loss": 0.9939, + "step": 37500 + }, + { + "epoch": 3.08, + "learning_rate": 3e-06, + "loss": 0.9777, + "step": 37550 + }, + { + "epoch": 3.08, + "learning_rate": 3e-06, + "loss": 0.9785, + "step": 37600 + }, + { + "epoch": 3.08, + "learning_rate": 3e-06, + "loss": 1.0, + "step": 37650 + }, + { + "epoch": 3.08, + "learning_rate": 3e-06, + "loss": 0.9911, + "step": 37700 + }, + { + "epoch": 3.08, + "learning_rate": 3e-06, + "loss": 0.9952, + "step": 37750 + }, + { + "epoch": 3.08, + "learning_rate": 3e-06, + "loss": 0.9814, + "step": 37800 + }, + { + "epoch": 3.08, + "learning_rate": 3e-06, + "loss": 0.985, + "step": 37850 + }, + { + "epoch": 3.08, + "learning_rate": 3e-06, + "loss": 0.9881, + "step": 37900 + }, + { + "epoch": 3.08, + "learning_rate": 3e-06, + "loss": 0.9882, + "step": 37950 + }, + { + "epoch": 3.08, + "learning_rate": 3e-06, + "loss": 1.0017, + "step": 38000 + }, + { + "epoch": 3.08, + "eval_loss": 0.5733200907707214, + "eval_runtime": 38.0152, + "eval_samples_per_second": 4.235, + "eval_steps_per_second": 0.079, + "eval_wer": 29.65895249695493, + "step": 38000 + }, + { + "epoch": 3.08, + "learning_rate": 3e-06, + "loss": 0.987, + "step": 38050 + }, + { + "epoch": 3.08, + "learning_rate": 3e-06, + "loss": 0.9886, + "step": 38100 + }, + { + "epoch": 3.08, + "learning_rate": 3e-06, + "loss": 0.9859, + "step": 38150 + }, + { + "epoch": 4.0, + "learning_rate": 3e-06, + "loss": 0.9752, + "step": 38200 + }, + { + "epoch": 4.0, + "learning_rate": 3e-06, + "loss": 0.9817, + "step": 38250 + }, + { + "epoch": 4.0, + "learning_rate": 3e-06, + "loss": 0.9751, + "step": 38300 + }, + { + "epoch": 4.0, + "learning_rate": 3e-06, + "loss": 0.9714, + "step": 38350 + }, + { + "epoch": 4.0, + "learning_rate": 3e-06, + "loss": 0.9681, + "step": 38400 + }, + { + "epoch": 4.0, + "learning_rate": 3e-06, + "loss": 0.9914, + "step": 38450 + }, + { + "epoch": 4.0, + "learning_rate": 3e-06, + "loss": 0.9775, + "step": 38500 + }, + { + "epoch": 4.0, + "learning_rate": 3e-06, + "loss": 0.9779, + "step": 38550 + }, + { + "epoch": 4.0, + "learning_rate": 3e-06, + "loss": 0.9697, + "step": 38600 + }, + { + "epoch": 4.0, + "learning_rate": 3e-06, + "loss": 0.979, + "step": 38650 + }, + { + "epoch": 4.01, + "learning_rate": 3e-06, + "loss": 0.9747, + "step": 38700 + }, + { + "epoch": 4.01, + "learning_rate": 3e-06, + "loss": 0.9777, + "step": 38750 + }, + { + "epoch": 4.01, + "learning_rate": 3e-06, + "loss": 0.973, + "step": 38800 + }, + { + "epoch": 4.01, + "learning_rate": 3e-06, + "loss": 0.9783, + "step": 38850 + }, + { + "epoch": 4.01, + "learning_rate": 3e-06, + "loss": 0.9719, + "step": 38900 + }, + { + "epoch": 4.01, + "learning_rate": 3e-06, + "loss": 0.9732, + "step": 38950 + }, + { + "epoch": 4.01, + "learning_rate": 3e-06, + "loss": 0.9878, + "step": 39000 + }, + { + "epoch": 4.01, + "eval_loss": 0.5744524002075195, + "eval_runtime": 41.2775, + "eval_samples_per_second": 3.9, + "eval_steps_per_second": 0.073, + "eval_wer": 30.267965895249695, + "step": 39000 + }, + { + "epoch": 4.01, + "learning_rate": 3e-06, + "loss": 0.9827, + "step": 39050 + }, + { + "epoch": 4.01, + "learning_rate": 3e-06, + "loss": 0.9796, + "step": 39100 + }, + { + "epoch": 4.01, + "learning_rate": 3e-06, + "loss": 0.9726, + "step": 39150 + }, + { + "epoch": 4.01, + "learning_rate": 3e-06, + "loss": 0.984, + "step": 39200 + }, + { + "epoch": 4.01, + "learning_rate": 3e-06, + "loss": 0.9739, + "step": 39250 + }, + { + "epoch": 4.01, + "learning_rate": 3e-06, + "loss": 0.9885, + "step": 39300 + }, + { + "epoch": 4.01, + "learning_rate": 3e-06, + "loss": 0.9724, + "step": 39350 + }, + { + "epoch": 4.01, + "learning_rate": 3e-06, + "loss": 0.9701, + "step": 39400 + }, + { + "epoch": 4.01, + "learning_rate": 3e-06, + "loss": 0.9749, + "step": 39450 + }, + { + "epoch": 4.01, + "learning_rate": 3e-06, + "loss": 0.965, + "step": 39500 + }, + { + "epoch": 4.01, + "learning_rate": 3e-06, + "loss": 0.9805, + "step": 39550 + }, + { + "epoch": 4.01, + "learning_rate": 3e-06, + "loss": 0.9829, + "step": 39600 + }, + { + "epoch": 4.01, + "learning_rate": 3e-06, + "loss": 0.9782, + "step": 39650 + }, + { + "epoch": 4.02, + "learning_rate": 3e-06, + "loss": 0.9672, + "step": 39700 + }, + { + "epoch": 4.02, + "learning_rate": 3e-06, + "loss": 0.9763, + "step": 39750 + }, + { + "epoch": 4.02, + "learning_rate": 3e-06, + "loss": 0.9795, + "step": 39800 + }, + { + "epoch": 4.02, + "learning_rate": 3e-06, + "loss": 0.9731, + "step": 39850 + }, + { + "epoch": 4.02, + "learning_rate": 3e-06, + "loss": 0.9693, + "step": 39900 + }, + { + "epoch": 4.02, + "learning_rate": 3e-06, + "loss": 0.9708, + "step": 39950 + }, + { + "epoch": 4.02, + "learning_rate": 3e-06, + "loss": 0.9854, + "step": 40000 + }, + { + "epoch": 4.02, + "eval_loss": 0.5719765424728394, + "eval_runtime": 38.4572, + "eval_samples_per_second": 4.186, + "eval_steps_per_second": 0.078, + "eval_wer": 30.054811205846526, + "step": 40000 + }, + { + "epoch": 4.02, + "learning_rate": 3e-06, + "loss": 0.9628, + "step": 40050 + }, + { + "epoch": 4.02, + "learning_rate": 3e-06, + "loss": 0.9651, + "step": 40100 + }, + { + "epoch": 4.02, + "learning_rate": 3e-06, + "loss": 0.9627, + "step": 40150 + }, + { + "epoch": 4.02, + "learning_rate": 3e-06, + "loss": 0.9572, + "step": 40200 + }, + { + "epoch": 4.02, + "learning_rate": 3e-06, + "loss": 0.9681, + "step": 40250 + }, + { + "epoch": 4.02, + "learning_rate": 3e-06, + "loss": 0.9642, + "step": 40300 + }, + { + "epoch": 4.02, + "learning_rate": 3e-06, + "loss": 0.9817, + "step": 40350 + }, + { + "epoch": 4.02, + "learning_rate": 3e-06, + "loss": 0.9654, + "step": 40400 + }, + { + "epoch": 4.02, + "learning_rate": 3e-06, + "loss": 0.9799, + "step": 40450 + }, + { + "epoch": 4.02, + "learning_rate": 3e-06, + "loss": 0.973, + "step": 40500 + }, + { + "epoch": 4.02, + "learning_rate": 3e-06, + "loss": 0.9749, + "step": 40550 + }, + { + "epoch": 4.02, + "learning_rate": 3e-06, + "loss": 0.9651, + "step": 40600 + }, + { + "epoch": 4.02, + "learning_rate": 3e-06, + "loss": 0.9686, + "step": 40650 + }, + { + "epoch": 4.03, + "learning_rate": 3e-06, + "loss": 0.9808, + "step": 40700 + }, + { + "epoch": 4.03, + "learning_rate": 3e-06, + "loss": 0.9778, + "step": 40750 + }, + { + "epoch": 4.03, + "learning_rate": 3e-06, + "loss": 0.9736, + "step": 40800 + }, + { + "epoch": 4.03, + "learning_rate": 3e-06, + "loss": 0.9767, + "step": 40850 + }, + { + "epoch": 4.03, + "learning_rate": 3e-06, + "loss": 0.9866, + "step": 40900 + }, + { + "epoch": 4.03, + "learning_rate": 3e-06, + "loss": 0.9708, + "step": 40950 + }, + { + "epoch": 4.03, + "learning_rate": 3e-06, + "loss": 0.9624, + "step": 41000 + }, + { + "epoch": 4.03, + "eval_loss": 0.5703173279762268, + "eval_runtime": 39.8395, + "eval_samples_per_second": 4.041, + "eval_steps_per_second": 0.075, + "eval_wer": 29.598051157125454, + "step": 41000 + }, + { + "epoch": 4.03, + "learning_rate": 3e-06, + "loss": 0.975, + "step": 41050 + }, + { + "epoch": 4.03, + "learning_rate": 3e-06, + "loss": 0.9664, + "step": 41100 + }, + { + "epoch": 4.03, + "learning_rate": 3e-06, + "loss": 0.9583, + "step": 41150 + }, + { + "epoch": 4.03, + "learning_rate": 3e-06, + "loss": 0.9644, + "step": 41200 + }, + { + "epoch": 4.03, + "learning_rate": 3e-06, + "loss": 0.967, + "step": 41250 + }, + { + "epoch": 4.03, + "learning_rate": 3e-06, + "loss": 0.9632, + "step": 41300 + }, + { + "epoch": 4.03, + "learning_rate": 3e-06, + "loss": 0.9534, + "step": 41350 + }, + { + "epoch": 4.03, + "learning_rate": 3e-06, + "loss": 0.967, + "step": 41400 + }, + { + "epoch": 4.03, + "learning_rate": 3e-06, + "loss": 0.9712, + "step": 41450 + }, + { + "epoch": 4.03, + "learning_rate": 3e-06, + "loss": 0.966, + "step": 41500 + }, + { + "epoch": 4.03, + "learning_rate": 3e-06, + "loss": 0.9675, + "step": 41550 + }, + { + "epoch": 4.03, + "learning_rate": 3e-06, + "loss": 0.9728, + "step": 41600 + }, + { + "epoch": 4.03, + "learning_rate": 3e-06, + "loss": 0.9588, + "step": 41650 + }, + { + "epoch": 4.04, + "learning_rate": 3e-06, + "loss": 0.9715, + "step": 41700 + }, + { + "epoch": 4.04, + "learning_rate": 3e-06, + "loss": 0.9731, + "step": 41750 + }, + { + "epoch": 4.04, + "learning_rate": 3e-06, + "loss": 0.9717, + "step": 41800 + }, + { + "epoch": 4.04, + "learning_rate": 3e-06, + "loss": 0.9789, + "step": 41850 + }, + { + "epoch": 4.04, + "learning_rate": 3e-06, + "loss": 0.9653, + "step": 41900 + }, + { + "epoch": 4.04, + "learning_rate": 3e-06, + "loss": 0.9616, + "step": 41950 + }, + { + "epoch": 4.04, + "learning_rate": 3e-06, + "loss": 0.9639, + "step": 42000 + }, + { + "epoch": 4.04, + "eval_loss": 0.5681419968605042, + "eval_runtime": 39.2115, + "eval_samples_per_second": 4.106, + "eval_steps_per_second": 0.077, + "eval_wer": 29.506699147381244, + "step": 42000 + }, + { + "epoch": 4.04, + "learning_rate": 3e-06, + "loss": 0.956, + "step": 42050 + }, + { + "epoch": 4.04, + "learning_rate": 3e-06, + "loss": 0.9585, + "step": 42100 + }, + { + "epoch": 4.04, + "learning_rate": 3e-06, + "loss": 0.9598, + "step": 42150 + }, + { + "epoch": 4.04, + "learning_rate": 3e-06, + "loss": 0.9657, + "step": 42200 + }, + { + "epoch": 4.04, + "learning_rate": 3e-06, + "loss": 0.975, + "step": 42250 + }, + { + "epoch": 4.04, + "learning_rate": 3e-06, + "loss": 0.9592, + "step": 42300 + }, + { + "epoch": 4.04, + "learning_rate": 3e-06, + "loss": 0.9659, + "step": 42350 + }, + { + "epoch": 4.04, + "learning_rate": 3e-06, + "loss": 0.9548, + "step": 42400 + }, + { + "epoch": 4.04, + "learning_rate": 3e-06, + "loss": 0.9556, + "step": 42450 + }, + { + "epoch": 4.04, + "learning_rate": 3e-06, + "loss": 0.9593, + "step": 42500 + }, + { + "epoch": 4.04, + "learning_rate": 3e-06, + "loss": 0.9671, + "step": 42550 + }, + { + "epoch": 4.04, + "learning_rate": 3e-06, + "loss": 0.9665, + "step": 42600 + }, + { + "epoch": 4.04, + "learning_rate": 3e-06, + "loss": 0.9625, + "step": 42650 + }, + { + "epoch": 4.05, + "learning_rate": 3e-06, + "loss": 0.962, + "step": 42700 + }, + { + "epoch": 4.05, + "learning_rate": 3e-06, + "loss": 0.9502, + "step": 42750 + }, + { + "epoch": 4.05, + "learning_rate": 3e-06, + "loss": 0.9677, + "step": 42800 + }, + { + "epoch": 4.05, + "learning_rate": 3e-06, + "loss": 0.9566, + "step": 42850 + }, + { + "epoch": 4.05, + "learning_rate": 3e-06, + "loss": 0.9614, + "step": 42900 + }, + { + "epoch": 4.05, + "learning_rate": 3e-06, + "loss": 0.9663, + "step": 42950 + }, + { + "epoch": 4.05, + "learning_rate": 3e-06, + "loss": 0.9569, + "step": 43000 + }, + { + "epoch": 4.05, + "eval_loss": 0.5678820013999939, + "eval_runtime": 39.7895, + "eval_samples_per_second": 4.046, + "eval_steps_per_second": 0.075, + "eval_wer": 29.628501827040193, + "step": 43000 + }, + { + "epoch": 4.05, + "learning_rate": 3e-06, + "loss": 0.9676, + "step": 43050 + }, + { + "epoch": 4.05, + "learning_rate": 3e-06, + "loss": 0.966, + "step": 43100 + }, + { + "epoch": 4.05, + "learning_rate": 3e-06, + "loss": 0.9577, + "step": 43150 + }, + { + "epoch": 4.05, + "learning_rate": 3e-06, + "loss": 0.9658, + "step": 43200 + }, + { + "epoch": 4.05, + "learning_rate": 3e-06, + "loss": 0.9663, + "step": 43250 + }, + { + "epoch": 4.05, + "learning_rate": 3e-06, + "loss": 0.9638, + "step": 43300 + }, + { + "epoch": 4.05, + "learning_rate": 3e-06, + "loss": 0.9635, + "step": 43350 + }, + { + "epoch": 4.05, + "learning_rate": 3e-06, + "loss": 0.9631, + "step": 43400 + }, + { + "epoch": 4.05, + "learning_rate": 3e-06, + "loss": 0.9602, + "step": 43450 + }, + { + "epoch": 4.05, + "learning_rate": 3e-06, + "loss": 0.9619, + "step": 43500 + }, + { + "epoch": 4.05, + "learning_rate": 3e-06, + "loss": 0.9521, + "step": 43550 + }, + { + "epoch": 4.05, + "learning_rate": 3e-06, + "loss": 0.96, + "step": 43600 + }, + { + "epoch": 4.05, + "learning_rate": 3e-06, + "loss": 0.9672, + "step": 43650 + }, + { + "epoch": 4.06, + "learning_rate": 3e-06, + "loss": 0.9528, + "step": 43700 + }, + { + "epoch": 4.06, + "learning_rate": 3e-06, + "loss": 0.9627, + "step": 43750 + }, + { + "epoch": 4.06, + "learning_rate": 3e-06, + "loss": 0.9595, + "step": 43800 + }, + { + "epoch": 4.06, + "learning_rate": 3e-06, + "loss": 0.9695, + "step": 43850 + }, + { + "epoch": 4.06, + "learning_rate": 3e-06, + "loss": 0.9641, + "step": 43900 + }, + { + "epoch": 4.06, + "learning_rate": 3e-06, + "loss": 0.958, + "step": 43950 + }, + { + "epoch": 4.06, + "learning_rate": 3e-06, + "loss": 0.9682, + "step": 44000 + }, + { + "epoch": 4.06, + "eval_loss": 0.5643439292907715, + "eval_runtime": 39.2385, + "eval_samples_per_second": 4.103, + "eval_steps_per_second": 0.076, + "eval_wer": 29.567600487210722, + "step": 44000 + }, + { + "epoch": 4.06, + "learning_rate": 3e-06, + "loss": 0.9527, + "step": 44050 + }, + { + "epoch": 4.06, + "learning_rate": 3e-06, + "loss": 0.96, + "step": 44100 + }, + { + "epoch": 4.06, + "learning_rate": 3e-06, + "loss": 0.9641, + "step": 44150 + }, + { + "epoch": 4.06, + "learning_rate": 3e-06, + "loss": 0.9748, + "step": 44200 + }, + { + "epoch": 4.06, + "learning_rate": 3e-06, + "loss": 0.9659, + "step": 44250 + }, + { + "epoch": 4.06, + "learning_rate": 3e-06, + "loss": 0.9565, + "step": 44300 + }, + { + "epoch": 4.06, + "learning_rate": 3e-06, + "loss": 0.9606, + "step": 44350 + }, + { + "epoch": 4.06, + "learning_rate": 3e-06, + "loss": 0.9561, + "step": 44400 + }, + { + "epoch": 4.06, + "learning_rate": 3e-06, + "loss": 0.9578, + "step": 44450 + }, + { + "epoch": 4.06, + "learning_rate": 3e-06, + "loss": 0.9665, + "step": 44500 + }, + { + "epoch": 4.06, + "learning_rate": 3e-06, + "loss": 0.9657, + "step": 44550 + }, + { + "epoch": 4.06, + "learning_rate": 3e-06, + "loss": 0.9535, + "step": 44600 + }, + { + "epoch": 4.06, + "learning_rate": 3e-06, + "loss": 0.9457, + "step": 44650 + }, + { + "epoch": 4.07, + "learning_rate": 3e-06, + "loss": 0.9564, + "step": 44700 + }, + { + "epoch": 4.07, + "learning_rate": 3e-06, + "loss": 0.9508, + "step": 44750 + }, + { + "epoch": 4.07, + "learning_rate": 3e-06, + "loss": 0.9576, + "step": 44800 + }, + { + "epoch": 4.07, + "learning_rate": 3e-06, + "loss": 0.9515, + "step": 44850 + }, + { + "epoch": 4.07, + "learning_rate": 3e-06, + "loss": 0.9616, + "step": 44900 + }, + { + "epoch": 4.07, + "learning_rate": 3e-06, + "loss": 0.9585, + "step": 44950 + }, + { + "epoch": 4.07, + "learning_rate": 3e-06, + "loss": 0.9539, + "step": 45000 + }, + { + "epoch": 4.07, + "eval_loss": 0.5601379871368408, + "eval_runtime": 39.9689, + "eval_samples_per_second": 4.028, + "eval_steps_per_second": 0.075, + "eval_wer": 29.567600487210722, + "step": 45000 + }, + { + "epoch": 4.07, + "learning_rate": 3e-06, + "loss": 0.9527, + "step": 45050 + }, + { + "epoch": 4.07, + "learning_rate": 3e-06, + "loss": 0.9553, + "step": 45100 + }, + { + "epoch": 4.07, + "learning_rate": 3e-06, + "loss": 0.962, + "step": 45150 + }, + { + "epoch": 4.07, + "learning_rate": 3e-06, + "loss": 0.96, + "step": 45200 + }, + { + "epoch": 4.07, + "learning_rate": 3e-06, + "loss": 0.9679, + "step": 45250 + }, + { + "epoch": 4.07, + "learning_rate": 3e-06, + "loss": 0.9522, + "step": 45300 + }, + { + "epoch": 4.07, + "learning_rate": 3e-06, + "loss": 0.9536, + "step": 45350 + }, + { + "epoch": 4.07, + "learning_rate": 3e-06, + "loss": 0.9557, + "step": 45400 + }, + { + "epoch": 4.07, + "learning_rate": 3e-06, + "loss": 0.9501, + "step": 45450 + }, + { + "epoch": 4.07, + "learning_rate": 3e-06, + "loss": 0.9559, + "step": 45500 + }, + { + "epoch": 4.07, + "learning_rate": 3e-06, + "loss": 0.9539, + "step": 45550 + }, + { + "epoch": 4.07, + "learning_rate": 3e-06, + "loss": 0.9556, + "step": 45600 + }, + { + "epoch": 4.07, + "learning_rate": 3e-06, + "loss": 0.9492, + "step": 45650 + }, + { + "epoch": 4.08, + "learning_rate": 3e-06, + "loss": 0.9678, + "step": 45700 + }, + { + "epoch": 4.08, + "learning_rate": 3e-06, + "loss": 0.9634, + "step": 45750 + }, + { + "epoch": 4.08, + "learning_rate": 3e-06, + "loss": 0.9571, + "step": 45800 + }, + { + "epoch": 4.08, + "learning_rate": 3e-06, + "loss": 0.9508, + "step": 45850 + }, + { + "epoch": 4.08, + "learning_rate": 3e-06, + "loss": 0.9468, + "step": 45900 + }, + { + "epoch": 4.08, + "learning_rate": 3e-06, + "loss": 0.9511, + "step": 45950 + }, + { + "epoch": 4.08, + "learning_rate": 3e-06, + "loss": 0.946, + "step": 46000 + }, + { + "epoch": 4.08, + "eval_loss": 0.5562007427215576, + "eval_runtime": 40.2757, + "eval_samples_per_second": 3.997, + "eval_steps_per_second": 0.074, + "eval_wer": 29.71985383678441, + "step": 46000 + }, + { + "epoch": 4.08, + "learning_rate": 3e-06, + "loss": 0.954, + "step": 46050 + }, + { + "epoch": 4.08, + "learning_rate": 3e-06, + "loss": 0.9573, + "step": 46100 + }, + { + "epoch": 4.08, + "learning_rate": 3e-06, + "loss": 0.9508, + "step": 46150 + }, + { + "epoch": 4.08, + "learning_rate": 3e-06, + "loss": 0.9633, + "step": 46200 + }, + { + "epoch": 4.08, + "learning_rate": 3e-06, + "loss": 0.9547, + "step": 46250 + }, + { + "epoch": 4.08, + "learning_rate": 3e-06, + "loss": 0.9515, + "step": 46300 + }, + { + "epoch": 4.08, + "learning_rate": 3e-06, + "loss": 0.9551, + "step": 46350 + }, + { + "epoch": 4.08, + "learning_rate": 3e-06, + "loss": 0.9544, + "step": 46400 + }, + { + "epoch": 5.0, + "learning_rate": 3e-06, + "loss": 0.9554, + "step": 46450 + }, + { + "epoch": 5.0, + "learning_rate": 3e-06, + "loss": 0.9445, + "step": 46500 + }, + { + "epoch": 5.0, + "learning_rate": 3e-06, + "loss": 0.9536, + "step": 46550 + }, + { + "epoch": 5.0, + "learning_rate": 3e-06, + "loss": 0.9375, + "step": 46600 + }, + { + "epoch": 5.0, + "learning_rate": 3e-06, + "loss": 0.9414, + "step": 46650 + }, + { + "epoch": 5.0, + "learning_rate": 3e-06, + "loss": 0.9306, + "step": 46700 + }, + { + "epoch": 5.0, + "learning_rate": 3e-06, + "loss": 0.938, + "step": 46750 + }, + { + "epoch": 5.0, + "learning_rate": 3e-06, + "loss": 0.9453, + "step": 46800 + }, + { + "epoch": 5.0, + "learning_rate": 3e-06, + "loss": 0.9366, + "step": 46850 + }, + { + "epoch": 5.0, + "learning_rate": 3e-06, + "loss": 0.935, + "step": 46900 + }, + { + "epoch": 5.01, + "learning_rate": 3e-06, + "loss": 0.9403, + "step": 46950 + }, + { + "epoch": 5.01, + "learning_rate": 3e-06, + "loss": 0.9429, + "step": 47000 + }, + { + "epoch": 5.01, + "eval_loss": 0.5591687560081482, + "eval_runtime": 38.1863, + "eval_samples_per_second": 4.216, + "eval_steps_per_second": 0.079, + "eval_wer": 29.293544457978076, + "step": 47000 + }, + { + "epoch": 5.01, + "learning_rate": 3e-06, + "loss": 0.9423, + "step": 47050 + }, + { + "epoch": 5.01, + "learning_rate": 3e-06, + "loss": 0.9492, + "step": 47100 + }, + { + "epoch": 5.01, + "learning_rate": 3e-06, + "loss": 0.9528, + "step": 47150 + }, + { + "epoch": 5.01, + "learning_rate": 3e-06, + "loss": 0.9372, + "step": 47200 + }, + { + "epoch": 5.01, + "learning_rate": 3e-06, + "loss": 0.9502, + "step": 47250 + }, + { + "epoch": 5.01, + "learning_rate": 3e-06, + "loss": 0.9487, + "step": 47300 + }, + { + "epoch": 5.01, + "learning_rate": 3e-06, + "loss": 0.942, + "step": 47350 + }, + { + "epoch": 5.01, + "learning_rate": 3e-06, + "loss": 0.9428, + "step": 47400 + }, + { + "epoch": 5.01, + "learning_rate": 3e-06, + "loss": 0.9332, + "step": 47450 + }, + { + "epoch": 5.01, + "learning_rate": 3e-06, + "loss": 0.931, + "step": 47500 + }, + { + "epoch": 5.01, + "learning_rate": 3e-06, + "loss": 0.939, + "step": 47550 + }, + { + "epoch": 5.01, + "learning_rate": 3e-06, + "loss": 0.9487, + "step": 47600 + }, + { + "epoch": 5.01, + "learning_rate": 3e-06, + "loss": 0.9494, + "step": 47650 + }, + { + "epoch": 5.01, + "learning_rate": 3e-06, + "loss": 0.9377, + "step": 47700 + }, + { + "epoch": 5.01, + "learning_rate": 3e-06, + "loss": 0.9346, + "step": 47750 + }, + { + "epoch": 5.01, + "learning_rate": 3e-06, + "loss": 0.9398, + "step": 47800 + }, + { + "epoch": 5.01, + "learning_rate": 3e-06, + "loss": 0.9444, + "step": 47850 + }, + { + "epoch": 5.01, + "learning_rate": 3e-06, + "loss": 0.938, + "step": 47900 + }, + { + "epoch": 5.02, + "learning_rate": 3e-06, + "loss": 0.9552, + "step": 47950 + }, + { + "epoch": 5.02, + "learning_rate": 3e-06, + "loss": 0.9462, + "step": 48000 + }, + { + "epoch": 5.02, + "eval_loss": 0.553955078125, + "eval_runtime": 38.7517, + "eval_samples_per_second": 4.155, + "eval_steps_per_second": 0.077, + "eval_wer": 29.08038976857491, + "step": 48000 + }, + { + "epoch": 5.02, + "learning_rate": 3e-06, + "loss": 0.9412, + "step": 48050 + }, + { + "epoch": 5.02, + "learning_rate": 3e-06, + "loss": 0.9428, + "step": 48100 + }, + { + "epoch": 5.02, + "learning_rate": 3e-06, + "loss": 0.938, + "step": 48150 + }, + { + "epoch": 5.02, + "learning_rate": 3e-06, + "loss": 0.944, + "step": 48200 + }, + { + "epoch": 5.02, + "learning_rate": 3e-06, + "loss": 0.9342, + "step": 48250 + }, + { + "epoch": 5.02, + "learning_rate": 3e-06, + "loss": 0.9285, + "step": 48300 + }, + { + "epoch": 5.02, + "learning_rate": 3e-06, + "loss": 0.9444, + "step": 48350 + }, + { + "epoch": 5.02, + "learning_rate": 3e-06, + "loss": 0.9451, + "step": 48400 + }, + { + "epoch": 5.02, + "learning_rate": 3e-06, + "loss": 0.947, + "step": 48450 + }, + { + "epoch": 5.02, + "learning_rate": 3e-06, + "loss": 0.9345, + "step": 48500 + }, + { + "epoch": 5.02, + "learning_rate": 3e-06, + "loss": 0.9345, + "step": 48550 + }, + { + "epoch": 5.02, + "learning_rate": 3e-06, + "loss": 0.9345, + "step": 48600 + }, + { + "epoch": 5.02, + "learning_rate": 3e-06, + "loss": 0.9357, + "step": 48650 + }, + { + "epoch": 5.02, + "learning_rate": 3e-06, + "loss": 0.9329, + "step": 48700 + }, + { + "epoch": 5.02, + "learning_rate": 3e-06, + "loss": 0.939, + "step": 48750 + }, + { + "epoch": 5.02, + "learning_rate": 3e-06, + "loss": 0.928, + "step": 48800 + }, + { + "epoch": 5.02, + "learning_rate": 3e-06, + "loss": 0.9421, + "step": 48850 + }, + { + "epoch": 5.02, + "learning_rate": 3e-06, + "loss": 0.9341, + "step": 48900 + }, + { + "epoch": 5.03, + "learning_rate": 3e-06, + "loss": 0.9404, + "step": 48950 + }, + { + "epoch": 5.03, + "learning_rate": 3e-06, + "loss": 0.9312, + "step": 49000 + }, + { + "epoch": 5.03, + "eval_loss": 0.5535339713096619, + "eval_runtime": 37.3366, + "eval_samples_per_second": 4.312, + "eval_steps_per_second": 0.08, + "eval_wer": 29.293544457978076, + "step": 49000 + }, + { + "epoch": 5.03, + "learning_rate": 3e-06, + "loss": 0.9387, + "step": 49050 + }, + { + "epoch": 5.03, + "learning_rate": 3e-06, + "loss": 0.9342, + "step": 49100 + }, + { + "epoch": 5.03, + "learning_rate": 3e-06, + "loss": 0.9565, + "step": 49150 + }, + { + "epoch": 5.03, + "learning_rate": 3e-06, + "loss": 0.9475, + "step": 49200 + }, + { + "epoch": 5.03, + "learning_rate": 3e-06, + "loss": 0.9436, + "step": 49250 + }, + { + "epoch": 5.03, + "learning_rate": 3e-06, + "loss": 0.9334, + "step": 49300 + }, + { + "epoch": 5.03, + "learning_rate": 3e-06, + "loss": 0.9391, + "step": 49350 + }, + { + "epoch": 5.03, + "learning_rate": 3e-06, + "loss": 0.9501, + "step": 49400 + }, + { + "epoch": 5.03, + "learning_rate": 3e-06, + "loss": 0.9388, + "step": 49450 + }, + { + "epoch": 5.03, + "learning_rate": 3e-06, + "loss": 0.9229, + "step": 49500 + }, + { + "epoch": 5.03, + "learning_rate": 3e-06, + "loss": 0.9357, + "step": 49550 + }, + { + "epoch": 5.03, + "learning_rate": 3e-06, + "loss": 0.9329, + "step": 49600 + }, + { + "epoch": 5.03, + "learning_rate": 3e-06, + "loss": 0.941, + "step": 49650 + }, + { + "epoch": 5.03, + "learning_rate": 3e-06, + "loss": 0.9445, + "step": 49700 + }, + { + "epoch": 5.03, + "learning_rate": 3e-06, + "loss": 0.9346, + "step": 49750 + }, + { + "epoch": 5.03, + "learning_rate": 3e-06, + "loss": 0.9375, + "step": 49800 + }, + { + "epoch": 5.03, + "learning_rate": 3e-06, + "loss": 0.9466, + "step": 49850 + }, + { + "epoch": 5.03, + "learning_rate": 3e-06, + "loss": 0.9282, + "step": 49900 + }, + { + "epoch": 5.04, + "learning_rate": 3e-06, + "loss": 0.9481, + "step": 49950 + }, + { + "epoch": 5.04, + "learning_rate": 3e-06, + "loss": 0.9462, + "step": 50000 + }, + { + "epoch": 5.04, + "eval_loss": 0.5536479949951172, + "eval_runtime": 40.3035, + "eval_samples_per_second": 3.995, + "eval_steps_per_second": 0.074, + "eval_wer": 28.68453105968331, + "step": 50000 + }, + { + "epoch": 5.04, + "learning_rate": 3e-06, + "loss": 0.9388, + "step": 50050 + }, + { + "epoch": 5.04, + "learning_rate": 3e-06, + "loss": 0.9368, + "step": 50100 + }, + { + "epoch": 5.04, + "learning_rate": 3e-06, + "loss": 0.9338, + "step": 50150 + }, + { + "epoch": 5.04, + "learning_rate": 3e-06, + "loss": 0.9355, + "step": 50200 + }, + { + "epoch": 5.04, + "learning_rate": 3e-06, + "loss": 0.9375, + "step": 50250 + }, + { + "epoch": 5.04, + "learning_rate": 3e-06, + "loss": 0.9436, + "step": 50300 + }, + { + "epoch": 5.04, + "learning_rate": 3e-06, + "loss": 0.9347, + "step": 50350 + }, + { + "epoch": 5.04, + "learning_rate": 3e-06, + "loss": 0.9285, + "step": 50400 + }, + { + "epoch": 5.04, + "learning_rate": 3e-06, + "loss": 0.9388, + "step": 50450 + }, + { + "epoch": 5.04, + "learning_rate": 3e-06, + "loss": 0.9442, + "step": 50500 + }, + { + "epoch": 5.04, + "learning_rate": 3e-06, + "loss": 0.9356, + "step": 50550 + }, + { + "epoch": 5.04, + "learning_rate": 3e-06, + "loss": 0.9309, + "step": 50600 + }, + { + "epoch": 5.04, + "learning_rate": 3e-06, + "loss": 0.9289, + "step": 50650 + }, + { + "epoch": 5.04, + "learning_rate": 3e-06, + "loss": 0.9342, + "step": 50700 + }, + { + "epoch": 5.04, + "learning_rate": 3e-06, + "loss": 0.9484, + "step": 50750 + }, + { + "epoch": 5.04, + "learning_rate": 3e-06, + "loss": 0.9295, + "step": 50800 + }, + { + "epoch": 5.04, + "learning_rate": 3e-06, + "loss": 0.9277, + "step": 50850 + }, + { + "epoch": 5.04, + "learning_rate": 3e-06, + "loss": 0.9389, + "step": 50900 + }, + { + "epoch": 5.05, + "learning_rate": 3e-06, + "loss": 0.9365, + "step": 50950 + }, + { + "epoch": 5.05, + "learning_rate": 3e-06, + "loss": 0.922, + "step": 51000 + }, + { + "epoch": 5.05, + "eval_loss": 0.5538690686225891, + "eval_runtime": 37.226, + "eval_samples_per_second": 4.325, + "eval_steps_per_second": 0.081, + "eval_wer": 28.714981729598048, + "step": 51000 + }, + { + "epoch": 5.05, + "learning_rate": 3e-06, + "loss": 0.9281, + "step": 51050 + }, + { + "epoch": 5.05, + "learning_rate": 3e-06, + "loss": 0.9453, + "step": 51100 + }, + { + "epoch": 5.05, + "learning_rate": 3e-06, + "loss": 0.9445, + "step": 51150 + }, + { + "epoch": 5.05, + "learning_rate": 3e-06, + "loss": 0.9396, + "step": 51200 + }, + { + "epoch": 5.05, + "learning_rate": 3e-06, + "loss": 0.9404, + "step": 51250 + }, + { + "epoch": 5.05, + "learning_rate": 3e-06, + "loss": 0.9301, + "step": 51300 + }, + { + "epoch": 5.05, + "learning_rate": 3e-06, + "loss": 0.9286, + "step": 51350 + }, + { + "epoch": 5.05, + "learning_rate": 3e-06, + "loss": 0.9303, + "step": 51400 + }, + { + "epoch": 5.05, + "learning_rate": 3e-06, + "loss": 0.9348, + "step": 51450 + }, + { + "epoch": 5.05, + "learning_rate": 3e-06, + "loss": 0.9356, + "step": 51500 + }, + { + "epoch": 5.05, + "learning_rate": 3e-06, + "loss": 0.9283, + "step": 51550 + }, + { + "epoch": 5.05, + "learning_rate": 3e-06, + "loss": 0.9432, + "step": 51600 + }, + { + "epoch": 5.05, + "learning_rate": 3e-06, + "loss": 0.9416, + "step": 51650 + }, + { + "epoch": 5.05, + "learning_rate": 3e-06, + "loss": 0.9319, + "step": 51700 + }, + { + "epoch": 5.05, + "learning_rate": 3e-06, + "loss": 0.925, + "step": 51750 + }, + { + "epoch": 5.05, + "learning_rate": 3e-06, + "loss": 0.95, + "step": 51800 + }, + { + "epoch": 5.05, + "learning_rate": 3e-06, + "loss": 0.9148, + "step": 51850 + }, + { + "epoch": 5.05, + "learning_rate": 3e-06, + "loss": 0.9348, + "step": 51900 + }, + { + "epoch": 5.06, + "learning_rate": 3e-06, + "loss": 0.9395, + "step": 51950 + }, + { + "epoch": 5.06, + "learning_rate": 3e-06, + "loss": 0.9253, + "step": 52000 + }, + { + "epoch": 5.06, + "eval_loss": 0.5509808659553528, + "eval_runtime": 35.2782, + "eval_samples_per_second": 4.564, + "eval_steps_per_second": 0.085, + "eval_wer": 28.836784409257003, + "step": 52000 + }, + { + "epoch": 0.0, + "learning_rate": 3e-06, + "loss": 0.9213, + "step": 52050 + }, + { + "epoch": 0.0, + "learning_rate": 3e-06, + "loss": 0.9149, + "step": 52100 + }, + { + "epoch": 0.0, + "learning_rate": 3e-06, + "loss": 0.9275, + "step": 52150 + }, + { + "epoch": 0.0, + "learning_rate": 3e-06, + "loss": 0.9333, + "step": 52200 + }, + { + "epoch": 0.0, + "learning_rate": 3e-06, + "loss": 0.9209, + "step": 52250 + }, + { + "epoch": 0.0, + "learning_rate": 3e-06, + "loss": 0.9104, + "step": 52300 + }, + { + "epoch": 0.0, + "learning_rate": 3e-06, + "loss": 0.9305, + "step": 52350 + }, + { + "epoch": 0.0, + "learning_rate": 3e-06, + "loss": 0.9208, + "step": 52400 + }, + { + "epoch": 0.0, + "learning_rate": 3e-06, + "loss": 0.9311, + "step": 52450 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 0.9315, + "step": 52500 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 0.9281, + "step": 52550 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 0.9199, + "step": 52600 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 0.9235, + "step": 52650 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 0.934, + "step": 52700 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 0.916, + "step": 52750 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 0.9148, + "step": 52800 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 0.9259, + "step": 52850 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 0.9238, + "step": 52900 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 0.9224, + "step": 52950 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 0.9065, + "step": 53000 + }, + { + "epoch": 0.01, + "eval_loss": 0.5493320226669312, + "eval_runtime": 40.3992, + "eval_samples_per_second": 3.985, + "eval_steps_per_second": 0.074, + "eval_wer": 28.5931790499391, + "step": 53000 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 0.9211, + "step": 53050 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 0.9196, + "step": 53100 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 0.9215, + "step": 53150 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 0.9177, + "step": 53200 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 0.9131, + "step": 53250 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 0.9228, + "step": 53300 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 0.9292, + "step": 53350 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 0.9221, + "step": 53400 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 0.9316, + "step": 53450 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 0.9125, + "step": 53500 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 0.9298, + "step": 53550 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 0.9227, + "step": 53600 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 0.9116, + "step": 53650 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 0.9297, + "step": 53700 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 0.933, + "step": 53750 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 0.9221, + "step": 53800 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 0.919, + "step": 53850 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 0.9263, + "step": 53900 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 0.9159, + "step": 53950 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 0.9096, + "step": 54000 + }, + { + "epoch": 0.02, + "eval_loss": 0.5489500164985657, + "eval_runtime": 39.6766, + "eval_samples_per_second": 4.058, + "eval_steps_per_second": 0.076, + "eval_wer": 28.501827040194883, + "step": 54000 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 0.9126, + "step": 54050 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 0.9237, + "step": 54100 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 0.9134, + "step": 54150 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 0.9093, + "step": 54200 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 0.9241, + "step": 54250 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 0.9302, + "step": 54300 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 0.9209, + "step": 54350 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 0.9197, + "step": 54400 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 0.9119, + "step": 54450 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 0.9085, + "step": 54500 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 0.9109, + "step": 54550 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 0.9121, + "step": 54600 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 0.927, + "step": 54650 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 0.9125, + "step": 54700 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 0.9125, + "step": 54750 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 0.917, + "step": 54800 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 0.9195, + "step": 54850 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 0.9271, + "step": 54900 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 0.9256, + "step": 54950 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 0.9329, + "step": 55000 + }, + { + "epoch": 0.03, + "eval_loss": 0.5482864379882812, + "eval_runtime": 37.1746, + "eval_samples_per_second": 4.331, + "eval_steps_per_second": 0.081, + "eval_wer": 28.288672350791717, + "step": 55000 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 0.9241, + "step": 55050 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 0.9263, + "step": 55100 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 0.9189, + "step": 55150 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 0.9318, + "step": 55200 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 0.9092, + "step": 55250 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 0.9066, + "step": 55300 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 0.9141, + "step": 55350 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 0.9221, + "step": 55400 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 0.9318, + "step": 55450 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 0.9198, + "step": 55500 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 0.9211, + "step": 55550 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 0.9102, + "step": 55600 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 0.9127, + "step": 55650 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 0.9206, + "step": 55700 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 0.9129, + "step": 55750 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 0.913, + "step": 55800 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 0.9265, + "step": 55850 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 0.9235, + "step": 55900 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 0.9131, + "step": 55950 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 0.9181, + "step": 56000 + }, + { + "epoch": 0.04, + "eval_loss": 0.5471073985099792, + "eval_runtime": 39.0291, + "eval_samples_per_second": 4.125, + "eval_steps_per_second": 0.077, + "eval_wer": 27.98416565164434, + "step": 56000 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 0.932, + "step": 56050 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 0.9324, + "step": 56100 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 0.9159, + "step": 56150 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 0.9267, + "step": 56200 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 0.9172, + "step": 56250 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 0.9271, + "step": 56300 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 0.918, + "step": 56350 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 0.9301, + "step": 56400 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 0.931, + "step": 56450 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 0.9095, + "step": 56500 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 0.9182, + "step": 56550 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 0.9115, + "step": 56600 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 0.9187, + "step": 56650 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 0.9336, + "step": 56700 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 0.9269, + "step": 56750 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 0.915, + "step": 56800 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 0.9263, + "step": 56850 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 0.9049, + "step": 56900 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 0.9169, + "step": 56950 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 0.914, + "step": 57000 + }, + { + "epoch": 0.05, + "eval_loss": 0.545651912689209, + "eval_runtime": 38.4894, + "eval_samples_per_second": 4.183, + "eval_steps_per_second": 0.078, + "eval_wer": 28.410475030450673, + "step": 57000 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 0.92, + "step": 57050 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 0.9217, + "step": 57100 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 0.9247, + "step": 57150 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 0.9188, + "step": 57200 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 0.9181, + "step": 57250 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 0.9123, + "step": 57300 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 0.9202, + "step": 57350 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 0.9264, + "step": 57400 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 0.9157, + "step": 57450 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 0.9074, + "step": 57500 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 0.9108, + "step": 57550 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 0.908, + "step": 57600 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 0.9189, + "step": 57650 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 0.9189, + "step": 57700 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 0.9135, + "step": 57750 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 0.9157, + "step": 57800 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 0.9218, + "step": 57850 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 0.9147, + "step": 57900 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 0.9228, + "step": 57950 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 0.9149, + "step": 58000 + }, + { + "epoch": 0.06, + "eval_loss": 0.5448886752128601, + "eval_runtime": 39.6395, + "eval_samples_per_second": 4.062, + "eval_steps_per_second": 0.076, + "eval_wer": 27.588306942752737, + "step": 58000 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 0.9191, + "step": 58050 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 0.9215, + "step": 58100 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 0.9133, + "step": 58150 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 0.9152, + "step": 58200 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 0.9034, + "step": 58250 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 0.9183, + "step": 58300 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 0.9137, + "step": 58350 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 0.9186, + "step": 58400 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 0.9166, + "step": 58450 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 0.9153, + "step": 58500 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 0.9159, + "step": 58550 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 0.921, + "step": 58600 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 0.9109, + "step": 58650 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 0.9079, + "step": 58700 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 0.9129, + "step": 58750 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 0.9141, + "step": 58800 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 0.9191, + "step": 58850 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 0.9078, + "step": 58900 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 0.9067, + "step": 58950 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 0.9092, + "step": 59000 + }, + { + "epoch": 0.07, + "eval_loss": 0.5405263304710388, + "eval_runtime": 39.5678, + "eval_samples_per_second": 4.069, + "eval_steps_per_second": 0.076, + "eval_wer": 27.831912302070645, + "step": 59000 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 0.9125, + "step": 59050 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 0.9044, + "step": 59100 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 0.9086, + "step": 59150 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 0.9177, + "step": 59200 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 0.9152, + "step": 59250 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 0.9193, + "step": 59300 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 0.9148, + "step": 59350 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 0.9, + "step": 59400 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 0.9084, + "step": 59450 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 0.9054, + "step": 59500 + }, + { + "epoch": 0.08, + "learning_rate": 3e-06, + "loss": 0.8991, + "step": 59550 + }, + { + "epoch": 0.08, + "learning_rate": 3e-06, + "loss": 0.9028, + "step": 59600 + }, + { + "epoch": 0.08, + "learning_rate": 3e-06, + "loss": 0.9068, + "step": 59650 + }, + { + "epoch": 0.08, + "learning_rate": 3e-06, + "loss": 0.9252, + "step": 59700 + }, + { + "epoch": 0.08, + "learning_rate": 3e-06, + "loss": 0.9156, + "step": 59750 + }, + { + "epoch": 0.08, + "learning_rate": 3e-06, + "loss": 0.9235, + "step": 59800 + }, + { + "epoch": 0.08, + "learning_rate": 3e-06, + "loss": 0.903, + "step": 59850 + }, + { + "epoch": 0.08, + "learning_rate": 3e-06, + "loss": 0.9125, + "step": 59900 + }, + { + "epoch": 0.08, + "learning_rate": 3e-06, + "loss": 0.9124, + "step": 59950 + }, + { + "epoch": 0.08, + "learning_rate": 3e-06, + "loss": 0.9101, + "step": 60000 + }, + { + "epoch": 0.08, + "eval_loss": 0.540199339389801, + "eval_runtime": 38.3011, + "eval_samples_per_second": 4.204, + "eval_steps_per_second": 0.078, + "eval_wer": 27.344701583434833, + "step": 60000 + }, + { + "epoch": 0.08, + "learning_rate": 3e-06, + "loss": 0.9211, + "step": 60050 + }, + { + "epoch": 0.08, + "learning_rate": 3e-06, + "loss": 0.9131, + "step": 60100 + }, + { + "epoch": 0.08, + "learning_rate": 3e-06, + "loss": 0.9119, + "step": 60150 + }, + { + "epoch": 0.08, + "learning_rate": 3e-06, + "loss": 0.9098, + "step": 60200 + }, + { + "epoch": 0.08, + "learning_rate": 3e-06, + "loss": 0.9096, + "step": 60250 + }, + { + "epoch": 1.0, + "learning_rate": 3e-06, + "loss": 0.9053, + "step": 60300 + }, + { + "epoch": 1.0, + "learning_rate": 3e-06, + "loss": 0.9087, + "step": 60350 + }, + { + "epoch": 1.0, + "learning_rate": 3e-06, + "loss": 0.8995, + "step": 60400 + }, + { + "epoch": 1.0, + "learning_rate": 3e-06, + "loss": 0.8805, + "step": 60450 + }, + { + "epoch": 1.0, + "learning_rate": 3e-06, + "loss": 0.8959, + "step": 60500 + }, + { + "epoch": 1.0, + "learning_rate": 3e-06, + "loss": 0.9045, + "step": 60550 + }, + { + "epoch": 1.0, + "learning_rate": 3e-06, + "loss": 0.9092, + "step": 60600 + }, + { + "epoch": 1.0, + "learning_rate": 3e-06, + "loss": 0.9041, + "step": 60650 + }, + { + "epoch": 1.0, + "learning_rate": 3e-06, + "loss": 0.912, + "step": 60700 + }, + { + "epoch": 1.0, + "learning_rate": 3e-06, + "loss": 0.9056, + "step": 60750 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 0.8947, + "step": 60800 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 0.9007, + "step": 60850 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 0.8982, + "step": 60900 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 0.9015, + "step": 60950 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 0.9046, + "step": 61000 + }, + { + "epoch": 1.01, + "eval_loss": 0.5374026298522949, + "eval_runtime": 40.3306, + "eval_samples_per_second": 3.992, + "eval_steps_per_second": 0.074, + "eval_wer": 27.557856272838006, + "step": 61000 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 0.8987, + "step": 61050 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 0.9055, + "step": 61100 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 0.9051, + "step": 61150 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 0.8914, + "step": 61200 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 0.8905, + "step": 61250 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 0.8891, + "step": 61300 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 0.9029, + "step": 61350 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 0.9086, + "step": 61400 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 0.9094, + "step": 61450 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 0.9161, + "step": 61500 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 0.9026, + "step": 61550 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 0.8919, + "step": 61600 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 0.8966, + "step": 61650 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 0.8983, + "step": 61700 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 0.8964, + "step": 61750 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 0.8927, + "step": 61800 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 0.9035, + "step": 61850 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 0.8993, + "step": 61900 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 0.8943, + "step": 61950 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 0.8917, + "step": 62000 + }, + { + "epoch": 1.02, + "eval_loss": 0.5390424132347107, + "eval_runtime": 37.0496, + "eval_samples_per_second": 4.346, + "eval_steps_per_second": 0.081, + "eval_wer": 27.740560292326432, + "step": 62000 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 0.9055, + "step": 62050 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 0.9059, + "step": 62100 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 0.9021, + "step": 62150 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 0.8927, + "step": 62200 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 0.901, + "step": 62250 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 0.8946, + "step": 62300 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 0.9117, + "step": 62350 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 0.8972, + "step": 62400 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 0.8969, + "step": 62450 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 0.8873, + "step": 62500 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 0.9085, + "step": 62550 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 0.9078, + "step": 62600 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 0.8987, + "step": 62650 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 0.8939, + "step": 62700 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 0.8953, + "step": 62750 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 0.8906, + "step": 62800 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 0.9033, + "step": 62850 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 0.8981, + "step": 62900 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 0.9033, + "step": 62950 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 0.8993, + "step": 63000 + }, + { + "epoch": 1.03, + "eval_loss": 0.5385776162147522, + "eval_runtime": 39.1791, + "eval_samples_per_second": 4.109, + "eval_steps_per_second": 0.077, + "eval_wer": 27.40560292326431, + "step": 63000 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 0.9011, + "step": 63050 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 0.9103, + "step": 63100 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 0.9001, + "step": 63150 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 0.8957, + "step": 63200 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 0.9021, + "step": 63250 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 0.8933, + "step": 63300 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 0.8868, + "step": 63350 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 0.887, + "step": 63400 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 0.8961, + "step": 63450 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 0.9008, + "step": 63500 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 0.8922, + "step": 63550 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 0.8862, + "step": 63600 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 0.8865, + "step": 63650 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 0.9023, + "step": 63700 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 0.8946, + "step": 63750 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 0.8954, + "step": 63800 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 0.8864, + "step": 63850 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 0.8879, + "step": 63900 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 0.8884, + "step": 63950 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 0.8875, + "step": 64000 + }, + { + "epoch": 1.04, + "eval_loss": 0.5361135601997375, + "eval_runtime": 37.6948, + "eval_samples_per_second": 4.271, + "eval_steps_per_second": 0.08, + "eval_wer": 26.857490864799026, + "step": 64000 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 0.9006, + "step": 64050 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 0.8948, + "step": 64100 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 0.8909, + "step": 64150 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 0.8905, + "step": 64200 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 0.8878, + "step": 64250 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 0.8985, + "step": 64300 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 0.9024, + "step": 64350 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 0.8999, + "step": 64400 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 0.9004, + "step": 64450 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 0.8974, + "step": 64500 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 0.8914, + "step": 64550 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 0.9016, + "step": 64600 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 0.8866, + "step": 64650 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 0.8997, + "step": 64700 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 0.8828, + "step": 64750 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 0.9024, + "step": 64800 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 0.8879, + "step": 64850 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 0.907, + "step": 64900 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 0.9021, + "step": 64950 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 0.8892, + "step": 65000 + }, + { + "epoch": 1.05, + "eval_loss": 0.5358411073684692, + "eval_runtime": 38.731, + "eval_samples_per_second": 4.157, + "eval_steps_per_second": 0.077, + "eval_wer": 27.344701583434833, + "step": 65000 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 0.9063, + "step": 65050 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 0.8907, + "step": 65100 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 0.8849, + "step": 65150 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 0.8857, + "step": 65200 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 0.8937, + "step": 65250 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 0.8851, + "step": 65300 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 0.8878, + "step": 65350 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 0.9036, + "step": 65400 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 0.91, + "step": 65450 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 0.8864, + "step": 65500 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 0.8888, + "step": 65550 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 0.8933, + "step": 65600 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 0.8898, + "step": 65650 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 0.9006, + "step": 65700 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 0.8957, + "step": 65750 + }, + { + "epoch": 1.06, + "learning_rate": 3e-06, + "loss": 0.8887, + "step": 65800 + }, + { + "epoch": 1.06, + "learning_rate": 3e-06, + "loss": 0.8967, + "step": 65850 + }, + { + "epoch": 1.06, + "learning_rate": 3e-06, + "loss": 0.895, + "step": 65900 + }, + { + "epoch": 1.06, + "learning_rate": 3e-06, + "loss": 0.8955, + "step": 65950 + }, + { + "epoch": 1.06, + "learning_rate": 3e-06, + "loss": 0.8929, + "step": 66000 + }, + { + "epoch": 1.06, + "eval_loss": 0.5346468091011047, + "eval_runtime": 38.4533, + "eval_samples_per_second": 4.187, + "eval_steps_per_second": 0.078, + "eval_wer": 26.73568818514007, + "step": 66000 + }, + { + "epoch": 0.0, + "learning_rate": 3e-06, + "loss": 0.8919, + "step": 66050 + }, + { + "epoch": 0.0, + "learning_rate": 3e-06, + "loss": 0.8799, + "step": 66100 + }, + { + "epoch": 0.0, + "learning_rate": 3e-06, + "loss": 0.8854, + "step": 66150 + }, + { + "epoch": 0.0, + "learning_rate": 3e-06, + "loss": 0.884, + "step": 66200 + }, + { + "epoch": 0.0, + "learning_rate": 3e-06, + "loss": 0.8742, + "step": 66250 + }, + { + "epoch": 0.0, + "learning_rate": 3e-06, + "loss": 0.8693, + "step": 66300 + }, + { + "epoch": 0.0, + "learning_rate": 3e-06, + "loss": 0.895, + "step": 66350 + }, + { + "epoch": 0.0, + "learning_rate": 3e-06, + "loss": 0.886, + "step": 66400 + }, + { + "epoch": 0.0, + "learning_rate": 3e-06, + "loss": 0.8899, + "step": 66450 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 0.8952, + "step": 66500 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 0.8939, + "step": 66550 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 0.8787, + "step": 66600 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 0.881, + "step": 66650 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 0.8903, + "step": 66700 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 0.8749, + "step": 66750 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 0.8794, + "step": 66800 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 0.899, + "step": 66850 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 0.8862, + "step": 66900 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 0.8834, + "step": 66950 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 0.8703, + "step": 67000 + }, + { + "epoch": 0.01, + "eval_loss": 0.5332406759262085, + "eval_runtime": 38.9701, + "eval_samples_per_second": 4.131, + "eval_steps_per_second": 0.077, + "eval_wer": 26.82704019488429, + "step": 67000 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 0.891, + "step": 67050 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 0.8921, + "step": 67100 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 0.8917, + "step": 67150 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 0.8824, + "step": 67200 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 0.8735, + "step": 67250 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 0.8824, + "step": 67300 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 0.8919, + "step": 67350 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 0.8903, + "step": 67400 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 0.8917, + "step": 67450 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 0.872, + "step": 67500 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 0.89, + "step": 67550 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 0.8824, + "step": 67600 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 0.8718, + "step": 67650 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 0.8881, + "step": 67700 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 0.8933, + "step": 67750 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 0.8834, + "step": 67800 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 0.878, + "step": 67850 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 0.8871, + "step": 67900 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 0.8799, + "step": 67950 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 0.8709, + "step": 68000 + }, + { + "epoch": 0.02, + "eval_loss": 0.5335850715637207, + "eval_runtime": 40.527, + "eval_samples_per_second": 3.973, + "eval_steps_per_second": 0.074, + "eval_wer": 26.705237515225335, + "step": 68000 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 0.8754, + "step": 68050 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 0.8951, + "step": 68100 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 0.8765, + "step": 68150 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 0.8698, + "step": 68200 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 0.8834, + "step": 68250 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 0.891, + "step": 68300 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 0.8845, + "step": 68350 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 0.8918, + "step": 68400 + }, + { + "epoch": 0.02, + "learning_rate": 3e-06, + "loss": 0.8838, + "step": 68450 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 0.8798, + "step": 68500 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 0.8744, + "step": 68550 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 0.878, + "step": 68600 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 0.8953, + "step": 68650 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 0.8737, + "step": 68700 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 0.8733, + "step": 68750 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 0.8783, + "step": 68800 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 0.8885, + "step": 68850 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 0.8957, + "step": 68900 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 0.8853, + "step": 68950 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 0.8917, + "step": 69000 + }, + { + "epoch": 0.03, + "eval_loss": 0.5328567028045654, + "eval_runtime": 39.7121, + "eval_samples_per_second": 4.054, + "eval_steps_per_second": 0.076, + "eval_wer": 27.070645554202194, + "step": 69000 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 0.8826, + "step": 69050 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 0.8879, + "step": 69100 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 0.8796, + "step": 69150 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 0.8915, + "step": 69200 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 0.8707, + "step": 69250 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 0.8722, + "step": 69300 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 0.8864, + "step": 69350 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 0.8908, + "step": 69400 + }, + { + "epoch": 0.03, + "learning_rate": 3e-06, + "loss": 0.9004, + "step": 69450 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 0.89, + "step": 69500 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 0.885, + "step": 69550 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 0.8716, + "step": 69600 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 0.876, + "step": 69650 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 0.8831, + "step": 69700 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 0.8746, + "step": 69750 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 0.8718, + "step": 69800 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 0.8868, + "step": 69850 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 0.8855, + "step": 69900 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 0.884, + "step": 69950 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 0.8867, + "step": 70000 + }, + { + "epoch": 0.04, + "eval_loss": 0.5323489904403687, + "eval_runtime": 38.5858, + "eval_samples_per_second": 4.173, + "eval_steps_per_second": 0.078, + "eval_wer": 26.33982947624848, + "step": 70000 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 0.892, + "step": 70050 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 0.8939, + "step": 70100 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 0.8801, + "step": 70150 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 0.8955, + "step": 70200 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 0.8804, + "step": 70250 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 0.8877, + "step": 70300 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 0.887, + "step": 70350 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 0.8989, + "step": 70400 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 0.8997, + "step": 70450 + }, + { + "epoch": 0.04, + "learning_rate": 3e-06, + "loss": 0.873, + "step": 70500 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 0.8836, + "step": 70550 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 0.8801, + "step": 70600 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 0.8835, + "step": 70650 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 0.8997, + "step": 70700 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 0.8984, + "step": 70750 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 0.8789, + "step": 70800 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 0.8869, + "step": 70850 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 0.8683, + "step": 70900 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 0.8801, + "step": 70950 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 0.8778, + "step": 71000 + }, + { + "epoch": 0.05, + "eval_loss": 0.5315227508544922, + "eval_runtime": 40.5435, + "eval_samples_per_second": 3.971, + "eval_steps_per_second": 0.074, + "eval_wer": 27.28380024360536, + "step": 71000 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 0.8828, + "step": 71050 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 0.8836, + "step": 71100 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 0.8853, + "step": 71150 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 0.8885, + "step": 71200 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 0.8895, + "step": 71250 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 0.884, + "step": 71300 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 0.8843, + "step": 71350 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 0.8881, + "step": 71400 + }, + { + "epoch": 0.05, + "learning_rate": 3e-06, + "loss": 0.8797, + "step": 71450 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 0.8716, + "step": 71500 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 0.8747, + "step": 71550 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 0.8715, + "step": 71600 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 0.8816, + "step": 71650 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 0.8806, + "step": 71700 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 0.8751, + "step": 71750 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 0.8798, + "step": 71800 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 0.8906, + "step": 71850 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 0.8773, + "step": 71900 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 0.8835, + "step": 71950 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 0.8757, + "step": 72000 + }, + { + "epoch": 0.06, + "eval_loss": 0.5316939353942871, + "eval_runtime": 41.0725, + "eval_samples_per_second": 3.92, + "eval_steps_per_second": 0.073, + "eval_wer": 26.248477466504262, + "step": 72000 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 0.8853, + "step": 72050 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 0.8912, + "step": 72100 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 0.8848, + "step": 72150 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 0.8867, + "step": 72200 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 0.8751, + "step": 72250 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 0.8824, + "step": 72300 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 0.8771, + "step": 72350 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 0.8824, + "step": 72400 + }, + { + "epoch": 0.06, + "learning_rate": 3e-06, + "loss": 0.8848, + "step": 72450 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 0.8821, + "step": 72500 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 0.8774, + "step": 72550 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 0.8841, + "step": 72600 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 0.8749, + "step": 72650 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 0.8716, + "step": 72700 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 0.8805, + "step": 72750 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 0.8838, + "step": 72800 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 0.8822, + "step": 72850 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 0.872, + "step": 72900 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 0.8699, + "step": 72950 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 0.8726, + "step": 73000 + }, + { + "epoch": 0.07, + "eval_loss": 0.5268862843513489, + "eval_runtime": 38.1185, + "eval_samples_per_second": 4.224, + "eval_steps_per_second": 0.079, + "eval_wer": 26.64433617539586, + "step": 73000 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 0.8761, + "step": 73050 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 0.8675, + "step": 73100 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 0.8775, + "step": 73150 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 0.889, + "step": 73200 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 0.8847, + "step": 73250 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 0.8843, + "step": 73300 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 0.8765, + "step": 73350 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 0.8638, + "step": 73400 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 0.8724, + "step": 73450 + }, + { + "epoch": 0.07, + "learning_rate": 3e-06, + "loss": 0.8697, + "step": 73500 + }, + { + "epoch": 0.08, + "learning_rate": 3e-06, + "loss": 0.8634, + "step": 73550 + }, + { + "epoch": 0.08, + "learning_rate": 3e-06, + "loss": 0.8669, + "step": 73600 + }, + { + "epoch": 0.08, + "learning_rate": 3e-06, + "loss": 0.8701, + "step": 73650 + }, + { + "epoch": 0.08, + "learning_rate": 3e-06, + "loss": 0.8936, + "step": 73700 + }, + { + "epoch": 0.08, + "learning_rate": 3e-06, + "loss": 0.8845, + "step": 73750 + }, + { + "epoch": 0.08, + "learning_rate": 3e-06, + "loss": 0.8858, + "step": 73800 + }, + { + "epoch": 0.08, + "learning_rate": 3e-06, + "loss": 0.8662, + "step": 73850 + }, + { + "epoch": 0.08, + "learning_rate": 3e-06, + "loss": 0.8769, + "step": 73900 + }, + { + "epoch": 0.08, + "learning_rate": 3e-06, + "loss": 0.8843, + "step": 73950 + }, + { + "epoch": 0.08, + "learning_rate": 3e-06, + "loss": 0.8792, + "step": 74000 + }, + { + "epoch": 0.08, + "eval_loss": 0.526807427406311, + "eval_runtime": 38.075, + "eval_samples_per_second": 4.229, + "eval_steps_per_second": 0.079, + "eval_wer": 26.15712545676005, + "step": 74000 + }, + { + "epoch": 0.08, + "learning_rate": 3e-06, + "loss": 0.8865, + "step": 74050 + }, + { + "epoch": 0.08, + "learning_rate": 3e-06, + "loss": 0.8765, + "step": 74100 + }, + { + "epoch": 0.08, + "learning_rate": 3e-06, + "loss": 0.8757, + "step": 74150 + }, + { + "epoch": 0.08, + "learning_rate": 3e-06, + "loss": 0.8743, + "step": 74200 + }, + { + "epoch": 0.08, + "learning_rate": 3e-06, + "loss": 0.8726, + "step": 74250 + }, + { + "epoch": 1.0, + "learning_rate": 3e-06, + "loss": 0.869, + "step": 74300 + }, + { + "epoch": 1.0, + "learning_rate": 3e-06, + "loss": 0.8727, + "step": 74350 + }, + { + "epoch": 1.0, + "learning_rate": 3e-06, + "loss": 0.8629, + "step": 74400 + }, + { + "epoch": 1.0, + "learning_rate": 3e-06, + "loss": 0.8445, + "step": 74450 + }, + { + "epoch": 1.0, + "learning_rate": 3e-06, + "loss": 0.8617, + "step": 74500 + }, + { + "epoch": 1.0, + "learning_rate": 3e-06, + "loss": 0.8703, + "step": 74550 + }, + { + "epoch": 1.0, + "learning_rate": 3e-06, + "loss": 0.8754, + "step": 74600 + }, + { + "epoch": 1.0, + "learning_rate": 3e-06, + "loss": 0.8683, + "step": 74650 + }, + { + "epoch": 1.0, + "learning_rate": 3e-06, + "loss": 0.8766, + "step": 74700 + }, + { + "epoch": 1.0, + "learning_rate": 3e-06, + "loss": 0.8712, + "step": 74750 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 0.8589, + "step": 74800 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 0.8652, + "step": 74850 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 0.8646, + "step": 74900 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 0.8675, + "step": 74950 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 0.8706, + "step": 75000 + }, + { + "epoch": 1.01, + "eval_loss": 0.5247220993041992, + "eval_runtime": 39.3966, + "eval_samples_per_second": 4.087, + "eval_steps_per_second": 0.076, + "eval_wer": 26.15712545676005, + "step": 75000 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 0.8645, + "step": 75050 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 0.8716, + "step": 75100 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 0.8705, + "step": 75150 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 0.858, + "step": 75200 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 0.8568, + "step": 75250 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 0.8549, + "step": 75300 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 0.8668, + "step": 75350 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 0.8727, + "step": 75400 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 0.8738, + "step": 75450 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 0.88, + "step": 75500 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 0.8669, + "step": 75550 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 0.8576, + "step": 75600 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 0.8612, + "step": 75650 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 0.8623, + "step": 75700 + }, + { + "epoch": 1.01, + "learning_rate": 3e-06, + "loss": 0.8613, + "step": 75750 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 0.8575, + "step": 75800 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 0.8687, + "step": 75850 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 0.8654, + "step": 75900 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 0.8602, + "step": 75950 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 0.8585, + "step": 76000 + }, + { + "epoch": 1.02, + "eval_loss": 0.5264820456504822, + "eval_runtime": 38.9874, + "eval_samples_per_second": 4.13, + "eval_steps_per_second": 0.077, + "eval_wer": 26.370280146163218, + "step": 76000 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 0.8717, + "step": 76050 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 0.8724, + "step": 76100 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 0.8691, + "step": 76150 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 0.8587, + "step": 76200 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 0.8665, + "step": 76250 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 0.8613, + "step": 76300 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 0.8781, + "step": 76350 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 0.8639, + "step": 76400 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 0.8638, + "step": 76450 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 0.8534, + "step": 76500 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 0.8729, + "step": 76550 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 0.8733, + "step": 76600 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 0.864, + "step": 76650 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 0.8605, + "step": 76700 + }, + { + "epoch": 1.02, + "learning_rate": 3e-06, + "loss": 0.8624, + "step": 76750 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 0.8575, + "step": 76800 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 0.8696, + "step": 76850 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 0.8636, + "step": 76900 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 0.8684, + "step": 76950 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 0.8659, + "step": 77000 + }, + { + "epoch": 1.03, + "eval_loss": 0.5261924862861633, + "eval_runtime": 39.7957, + "eval_samples_per_second": 4.046, + "eval_steps_per_second": 0.075, + "eval_wer": 26.73568818514007, + "step": 77000 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 0.8672, + "step": 77050 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 0.8756, + "step": 77100 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 0.8668, + "step": 77150 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 0.8619, + "step": 77200 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 0.8676, + "step": 77250 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 0.8594, + "step": 77300 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 0.8534, + "step": 77350 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 0.8539, + "step": 77400 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 0.8632, + "step": 77450 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 0.8672, + "step": 77500 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 0.8589, + "step": 77550 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 0.8525, + "step": 77600 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 0.8528, + "step": 77650 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 0.868, + "step": 77700 + }, + { + "epoch": 1.03, + "learning_rate": 3e-06, + "loss": 0.8608, + "step": 77750 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 0.8622, + "step": 77800 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 0.8536, + "step": 77850 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 0.8548, + "step": 77900 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 0.8552, + "step": 77950 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 0.8551, + "step": 78000 + }, + { + "epoch": 1.04, + "eval_loss": 0.5248793363571167, + "eval_runtime": 38.0291, + "eval_samples_per_second": 4.234, + "eval_steps_per_second": 0.079, + "eval_wer": 26.065773447015832, + "step": 78000 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 0.868, + "step": 78050 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 0.8623, + "step": 78100 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 0.8586, + "step": 78150 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 0.8573, + "step": 78200 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 0.8553, + "step": 78250 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 0.8659, + "step": 78300 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 0.8696, + "step": 78350 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 0.8676, + "step": 78400 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 0.8669, + "step": 78450 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 0.8631, + "step": 78500 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 0.8586, + "step": 78550 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 0.8691, + "step": 78600 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 0.8537, + "step": 78650 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 0.8671, + "step": 78700 + }, + { + "epoch": 1.04, + "learning_rate": 3e-06, + "loss": 0.8505, + "step": 78750 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 0.8684, + "step": 78800 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 0.8555, + "step": 78850 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 0.8747, + "step": 78900 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 0.8695, + "step": 78950 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 0.8572, + "step": 79000 + }, + { + "epoch": 1.05, + "eval_loss": 0.5249020457267761, + "eval_runtime": 38.8753, + "eval_samples_per_second": 4.141, + "eval_steps_per_second": 0.077, + "eval_wer": 26.278928136419, + "step": 79000 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 0.874, + "step": 79050 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 0.8587, + "step": 79100 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 0.8527, + "step": 79150 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 0.853, + "step": 79200 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 0.8614, + "step": 79250 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 0.8535, + "step": 79300 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 0.856, + "step": 79350 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 0.871, + "step": 79400 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 0.877, + "step": 79450 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 0.854, + "step": 79500 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 0.856, + "step": 79550 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 0.8606, + "step": 79600 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 0.8567, + "step": 79650 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 0.8673, + "step": 79700 + }, + { + "epoch": 1.05, + "learning_rate": 3e-06, + "loss": 0.8626, + "step": 79750 + }, + { + "epoch": 1.06, + "learning_rate": 3e-06, + "loss": 0.8571, + "step": 79800 + }, + { + "epoch": 1.06, + "learning_rate": 3e-06, + "loss": 0.8651, + "step": 79850 + }, + { + "epoch": 1.06, + "learning_rate": 3e-06, + "loss": 0.8625, + "step": 79900 + }, + { + "epoch": 1.06, + "learning_rate": 3e-06, + "loss": 0.8639, + "step": 79950 + }, + { + "epoch": 1.06, + "learning_rate": 3e-06, + "loss": 0.8612, + "step": 80000 + }, + { + "epoch": 1.06, + "eval_loss": 0.5234566330909729, + "eval_runtime": 39.62, + "eval_samples_per_second": 4.064, + "eval_steps_per_second": 0.076, + "eval_wer": 25.761266747868454, + "step": 80000 + }, + { + "epoch": 1.06, + "learning_rate": 3e-06, + "loss": 0.8742, + "step": 80050 + }, + { + "epoch": 1.06, + "learning_rate": 3e-06, + "loss": 0.8658, + "step": 80100 + }, + { + "epoch": 1.06, + "learning_rate": 3e-06, + "loss": 0.8611, + "step": 80150 + }, + { + "epoch": 1.06, + "learning_rate": 3e-06, + "loss": 0.8718, + "step": 80200 + }, + { + "epoch": 1.06, + "learning_rate": 3e-06, + "loss": 0.8697, + "step": 80250 + }, + { + "epoch": 1.06, + "learning_rate": 3e-06, + "loss": 0.8722, + "step": 80300 + }, + { + "epoch": 1.06, + "learning_rate": 3e-06, + "loss": 0.873, + "step": 80350 + }, + { + "epoch": 1.06, + "learning_rate": 3e-06, + "loss": 0.8662, + "step": 80400 + }, + { + "epoch": 1.06, + "learning_rate": 3e-06, + "loss": 0.8656, + "step": 80450 + }, + { + "epoch": 1.06, + "learning_rate": 3e-06, + "loss": 0.87, + "step": 80500 + }, + { + "epoch": 1.06, + "learning_rate": 3e-06, + "loss": 0.8658, + "step": 80550 + }, + { + "epoch": 1.06, + "learning_rate": 3e-06, + "loss": 0.8703, + "step": 80600 + }, + { + "epoch": 1.06, + "learning_rate": 3e-06, + "loss": 0.8718, + "step": 80650 + }, + { + "epoch": 1.06, + "learning_rate": 3e-06, + "loss": 0.8589, + "step": 80700 + }, + { + "epoch": 1.06, + "learning_rate": 3e-06, + "loss": 0.8676, + "step": 80750 + }, + { + "epoch": 1.07, + "learning_rate": 3e-06, + "loss": 0.8589, + "step": 80800 + }, + { + "epoch": 1.07, + "learning_rate": 3e-06, + "loss": 0.8624, + "step": 80850 + }, + { + "epoch": 1.07, + "learning_rate": 3e-06, + "loss": 0.8645, + "step": 80900 + }, + { + "epoch": 1.07, + "learning_rate": 3e-06, + "loss": 0.8727, + "step": 80950 + }, + { + "epoch": 1.07, + "learning_rate": 3e-06, + "loss": 0.8598, + "step": 81000 + }, + { + "epoch": 1.07, + "eval_loss": 0.5208215117454529, + "eval_runtime": 38.4397, + "eval_samples_per_second": 4.188, + "eval_steps_per_second": 0.078, + "eval_wer": 25.700365408038977, + "step": 81000 + }, + { + "epoch": 1.07, + "learning_rate": 3e-06, + "loss": 0.882, + "step": 81050 + }, + { + "epoch": 1.07, + "learning_rate": 3e-06, + "loss": 0.8737, + "step": 81100 + }, + { + "epoch": 1.07, + "learning_rate": 3e-06, + "loss": 0.8689, + "step": 81150 + }, + { + "epoch": 1.07, + "learning_rate": 3e-06, + "loss": 0.8631, + "step": 81200 + }, + { + "epoch": 1.07, + "learning_rate": 3e-06, + "loss": 0.8722, + "step": 81250 + }, + { + "epoch": 1.07, + "learning_rate": 3e-06, + "loss": 0.8688, + "step": 81300 + }, + { + "epoch": 1.07, + "learning_rate": 3e-06, + "loss": 0.8662, + "step": 81350 + }, + { + "epoch": 1.07, + "learning_rate": 3e-06, + "loss": 0.8627, + "step": 81400 + }, + { + "epoch": 1.07, + "learning_rate": 3e-06, + "loss": 0.8671, + "step": 81450 + }, + { + "epoch": 1.07, + "learning_rate": 3e-06, + "loss": 0.8573, + "step": 81500 + }, + { + "epoch": 1.07, + "learning_rate": 3e-06, + "loss": 0.861, + "step": 81550 + }, + { + "epoch": 1.07, + "learning_rate": 3e-06, + "loss": 0.867, + "step": 81600 + }, + { + "epoch": 1.07, + "learning_rate": 3e-06, + "loss": 0.8639, + "step": 81650 + }, + { + "epoch": 1.07, + "learning_rate": 3e-06, + "loss": 0.8607, + "step": 81700 + }, + { + "epoch": 1.07, + "learning_rate": 3e-06, + "loss": 0.8602, + "step": 81750 + }, + { + "epoch": 1.08, + "learning_rate": 3e-06, + "loss": 0.8719, + "step": 81800 + }, + { + "epoch": 1.08, + "learning_rate": 3e-06, + "loss": 0.8637, + "step": 81850 + }, + { + "epoch": 1.08, + "learning_rate": 3e-06, + "loss": 0.8598, + "step": 81900 + }, + { + "epoch": 1.08, + "learning_rate": 3e-06, + "loss": 0.867, + "step": 81950 + }, + { + "epoch": 1.08, + "learning_rate": 3e-06, + "loss": 0.8686, + "step": 82000 + }, + { + "epoch": 1.08, + "eval_loss": 0.5214089155197144, + "eval_runtime": 37.0595, + "eval_samples_per_second": 4.344, + "eval_steps_per_second": 0.081, + "eval_wer": 25.700365408038977, + "step": 82000 + }, + { + "epoch": 1.08, + "learning_rate": 3e-06, + "loss": 0.8712, + "step": 82050 + }, + { + "epoch": 1.08, + "learning_rate": 3e-06, + "loss": 0.8656, + "step": 82100 + }, + { + "epoch": 1.08, + "learning_rate": 3e-06, + "loss": 0.8677, + "step": 82150 + }, + { + "epoch": 1.08, + "learning_rate": 3e-06, + "loss": 0.8656, + "step": 82200 + }, + { + "epoch": 1.08, + "learning_rate": 3e-06, + "loss": 0.8651, + "step": 82250 + }, + { + "epoch": 1.08, + "learning_rate": 3e-06, + "loss": 0.874, + "step": 82300 + }, + { + "epoch": 1.08, + "learning_rate": 3e-06, + "loss": 0.8735, + "step": 82350 + }, + { + "epoch": 1.08, + "learning_rate": 3e-06, + "loss": 0.8637, + "step": 82400 + }, + { + "epoch": 1.08, + "learning_rate": 3e-06, + "loss": 0.8657, + "step": 82450 + }, + { + "epoch": 1.08, + "learning_rate": 3e-06, + "loss": 0.8685, + "step": 82500 + }, + { + "epoch": 1.08, + "learning_rate": 3e-06, + "loss": 0.8534, + "step": 82550 + }, + { + "epoch": 2.0, + "learning_rate": 3e-06, + "loss": 0.8645, + "step": 82600 + }, + { + "epoch": 2.0, + "learning_rate": 3e-06, + "loss": 0.8448, + "step": 82650 + }, + { + "epoch": 2.0, + "learning_rate": 3e-06, + "loss": 0.8565, + "step": 82700 + }, + { + "epoch": 2.0, + "learning_rate": 3e-06, + "loss": 0.8537, + "step": 82750 + }, + { + "epoch": 2.0, + "learning_rate": 3e-06, + "loss": 0.8496, + "step": 82800 + }, + { + "epoch": 2.0, + "learning_rate": 3e-06, + "loss": 0.8512, + "step": 82850 + }, + { + "epoch": 2.0, + "learning_rate": 3e-06, + "loss": 0.8483, + "step": 82900 + }, + { + "epoch": 2.0, + "learning_rate": 3e-06, + "loss": 0.8476, + "step": 82950 + }, + { + "epoch": 2.0, + "learning_rate": 3e-06, + "loss": 0.8503, + "step": 83000 + }, + { + "epoch": 2.0, + "eval_loss": 0.5213810205459595, + "eval_runtime": 38.3305, + "eval_samples_per_second": 4.2, + "eval_steps_per_second": 0.078, + "eval_wer": 25.700365408038977, + "step": 83000 + }, + { + "epoch": 2.0, + "learning_rate": 3e-06, + "loss": 0.85, + "step": 83050 + }, + { + "epoch": 2.01, + "learning_rate": 3e-06, + "loss": 0.848, + "step": 83100 + }, + { + "epoch": 2.01, + "learning_rate": 3e-06, + "loss": 0.8498, + "step": 83150 + }, + { + "epoch": 2.01, + "learning_rate": 3e-06, + "loss": 0.8523, + "step": 83200 + }, + { + "epoch": 2.01, + "learning_rate": 3e-06, + "loss": 0.8537, + "step": 83250 + }, + { + "epoch": 2.01, + "learning_rate": 3e-06, + "loss": 0.8542, + "step": 83300 + }, + { + "epoch": 2.01, + "learning_rate": 3e-06, + "loss": 0.8519, + "step": 83350 + }, + { + "epoch": 2.01, + "learning_rate": 3e-06, + "loss": 0.857, + "step": 83400 + }, + { + "epoch": 2.01, + "learning_rate": 3e-06, + "loss": 0.8636, + "step": 83450 + }, + { + "epoch": 2.01, + "learning_rate": 3e-06, + "loss": 0.8408, + "step": 83500 + }, + { + "epoch": 2.01, + "learning_rate": 3e-06, + "loss": 0.8688, + "step": 83550 + }, + { + "epoch": 2.01, + "learning_rate": 3e-06, + "loss": 0.8539, + "step": 83600 + }, + { + "epoch": 2.01, + "learning_rate": 3e-06, + "loss": 0.8406, + "step": 83650 + }, + { + "epoch": 2.01, + "learning_rate": 3e-06, + "loss": 0.8532, + "step": 83700 + }, + { + "epoch": 2.01, + "learning_rate": 3e-06, + "loss": 0.86, + "step": 83750 + }, + { + "epoch": 2.01, + "learning_rate": 3e-06, + "loss": 0.8591, + "step": 83800 + }, + { + "epoch": 2.01, + "learning_rate": 3e-06, + "loss": 0.8469, + "step": 83850 + }, + { + "epoch": 2.01, + "learning_rate": 3e-06, + "loss": 0.8537, + "step": 83900 + }, + { + "epoch": 2.01, + "learning_rate": 3e-06, + "loss": 0.848, + "step": 83950 + }, + { + "epoch": 2.01, + "learning_rate": 3e-06, + "loss": 0.8545, + "step": 84000 + }, + { + "epoch": 2.01, + "eval_loss": 0.5214766263961792, + "eval_runtime": 53.964, + "eval_samples_per_second": 2.983, + "eval_steps_per_second": 0.056, + "eval_wer": 28.227771010962243, + "step": 84000 + }, + { + "epoch": 2.01, + "learning_rate": 3e-06, + "loss": 0.8418, + "step": 84050 + }, + { + "epoch": 2.02, + "learning_rate": 3e-06, + "loss": 0.8581, + "step": 84100 + }, + { + "epoch": 2.02, + "learning_rate": 3e-06, + "loss": 0.851, + "step": 84150 + }, + { + "epoch": 2.02, + "learning_rate": 3e-06, + "loss": 0.8384, + "step": 84200 + }, + { + "epoch": 2.02, + "learning_rate": 3e-06, + "loss": 0.8519, + "step": 84250 + }, + { + "epoch": 2.02, + "learning_rate": 3e-06, + "loss": 0.859, + "step": 84300 + }, + { + "epoch": 2.02, + "learning_rate": 3e-06, + "loss": 0.8476, + "step": 84350 + }, + { + "epoch": 2.02, + "learning_rate": 3e-06, + "loss": 0.8543, + "step": 84400 + }, + { + "epoch": 2.02, + "learning_rate": 3e-06, + "loss": 0.8462, + "step": 84450 + }, + { + "epoch": 2.02, + "learning_rate": 3e-06, + "loss": 0.8383, + "step": 84500 + }, + { + "epoch": 2.02, + "learning_rate": 3e-06, + "loss": 0.8431, + "step": 84550 + }, + { + "epoch": 2.02, + "learning_rate": 3e-06, + "loss": 0.852, + "step": 84600 + }, + { + "epoch": 2.02, + "learning_rate": 3e-06, + "loss": 0.8607, + "step": 84650 + }, + { + "epoch": 2.02, + "learning_rate": 3e-06, + "loss": 0.8469, + "step": 84700 + }, + { + "epoch": 2.02, + "learning_rate": 3e-06, + "loss": 0.8391, + "step": 84750 + }, + { + "epoch": 2.02, + "learning_rate": 3e-06, + "loss": 0.8567, + "step": 84800 + }, + { + "epoch": 2.02, + "learning_rate": 3e-06, + "loss": 0.8409, + "step": 84850 + }, + { + "epoch": 2.02, + "learning_rate": 3e-06, + "loss": 0.8509, + "step": 84900 + }, + { + "epoch": 2.02, + "learning_rate": 3e-06, + "loss": 0.8606, + "step": 84950 + }, + { + "epoch": 2.02, + "learning_rate": 3e-06, + "loss": 0.8594, + "step": 85000 + }, + { + "epoch": 2.02, + "eval_loss": 0.5186431407928467, + "eval_runtime": 39.735, + "eval_samples_per_second": 4.052, + "eval_steps_per_second": 0.076, + "eval_wer": 25.669914738124238, + "step": 85000 + }, + { + "epoch": 2.02, + "learning_rate": 3e-06, + "loss": 0.8663, + "step": 85050 + }, + { + "epoch": 2.03, + "learning_rate": 3e-06, + "loss": 0.8503, + "step": 85100 + }, + { + "epoch": 2.03, + "learning_rate": 3e-06, + "loss": 0.848, + "step": 85150 + }, + { + "epoch": 2.03, + "learning_rate": 3e-06, + "loss": 0.8507, + "step": 85200 + }, + { + "epoch": 2.03, + "learning_rate": 3e-06, + "loss": 0.8559, + "step": 85250 + }, + { + "epoch": 2.03, + "learning_rate": 3e-06, + "loss": 0.8536, + "step": 85300 + }, + { + "epoch": 2.03, + "learning_rate": 3e-06, + "loss": 0.8562, + "step": 85350 + }, + { + "epoch": 2.03, + "learning_rate": 3e-06, + "loss": 0.843, + "step": 85400 + }, + { + "epoch": 2.03, + "learning_rate": 3e-06, + "loss": 0.8628, + "step": 85450 + }, + { + "epoch": 2.03, + "learning_rate": 3e-06, + "loss": 0.8418, + "step": 85500 + }, + { + "epoch": 2.03, + "learning_rate": 3e-06, + "loss": 0.8493, + "step": 85550 + }, + { + "epoch": 2.03, + "learning_rate": 3e-06, + "loss": 0.8544, + "step": 85600 + }, + { + "epoch": 2.03, + "learning_rate": 3e-06, + "loss": 0.8529, + "step": 85650 + }, + { + "epoch": 2.03, + "learning_rate": 3e-06, + "loss": 0.8573, + "step": 85700 + }, + { + "epoch": 2.03, + "learning_rate": 3e-06, + "loss": 0.8541, + "step": 85750 + }, + { + "epoch": 2.03, + "learning_rate": 3e-06, + "loss": 0.8428, + "step": 85800 + }, + { + "epoch": 2.03, + "learning_rate": 3e-06, + "loss": 0.8432, + "step": 85850 + }, + { + "epoch": 2.03, + "learning_rate": 3e-06, + "loss": 0.8402, + "step": 85900 + }, + { + "epoch": 2.03, + "learning_rate": 3e-06, + "loss": 0.8465, + "step": 85950 + }, + { + "epoch": 2.03, + "learning_rate": 3e-06, + "loss": 0.86, + "step": 86000 + }, + { + "epoch": 2.03, + "eval_loss": 0.5196462273597717, + "eval_runtime": 40.0681, + "eval_samples_per_second": 4.018, + "eval_steps_per_second": 0.075, + "eval_wer": 25.57856272838002, + "step": 86000 + }, + { + "epoch": 2.03, + "learning_rate": 3e-06, + "loss": 0.8511, + "step": 86050 + }, + { + "epoch": 2.04, + "learning_rate": 3e-06, + "loss": 0.845, + "step": 86100 + }, + { + "epoch": 2.04, + "learning_rate": 3e-06, + "loss": 0.8432, + "step": 86150 + }, + { + "epoch": 2.04, + "learning_rate": 3e-06, + "loss": 0.8493, + "step": 86200 + }, + { + "epoch": 2.04, + "learning_rate": 3e-06, + "loss": 0.8467, + "step": 86250 + }, + { + "epoch": 2.04, + "learning_rate": 3e-06, + "loss": 0.8396, + "step": 86300 + }, + { + "epoch": 2.04, + "learning_rate": 3e-06, + "loss": 0.8501, + "step": 86350 + }, + { + "epoch": 2.04, + "learning_rate": 3e-06, + "loss": 0.8504, + "step": 86400 + }, + { + "epoch": 2.04, + "learning_rate": 3e-06, + "loss": 0.8699, + "step": 86450 + }, + { + "epoch": 2.04, + "learning_rate": 3e-06, + "loss": 0.8479, + "step": 86500 + }, + { + "epoch": 2.04, + "learning_rate": 3e-06, + "loss": 0.8433, + "step": 86550 + }, + { + "epoch": 2.04, + "learning_rate": 3e-06, + "loss": 0.8577, + "step": 86600 + }, + { + "epoch": 2.04, + "learning_rate": 3e-06, + "loss": 0.8428, + "step": 86650 + }, + { + "epoch": 2.04, + "learning_rate": 3e-06, + "loss": 0.8472, + "step": 86700 + }, + { + "epoch": 2.04, + "learning_rate": 3e-06, + "loss": 0.8506, + "step": 86750 + }, + { + "epoch": 2.04, + "learning_rate": 3e-06, + "loss": 0.8485, + "step": 86800 + }, + { + "epoch": 2.04, + "learning_rate": 3e-06, + "loss": 0.8391, + "step": 86850 + }, + { + "epoch": 2.04, + "learning_rate": 3e-06, + "loss": 0.8391, + "step": 86900 + }, + { + "epoch": 2.04, + "learning_rate": 3e-06, + "loss": 0.8627, + "step": 86950 + }, + { + "epoch": 2.04, + "learning_rate": 3e-06, + "loss": 0.8514, + "step": 87000 + }, + { + "epoch": 2.04, + "eval_loss": 0.5203261971473694, + "eval_runtime": 39.1863, + "eval_samples_per_second": 4.109, + "eval_steps_per_second": 0.077, + "eval_wer": 25.182704019488426, + "step": 87000 + }, + { + "epoch": 2.04, + "learning_rate": 3e-06, + "loss": 0.8551, + "step": 87050 + }, + { + "epoch": 2.05, + "learning_rate": 3e-06, + "loss": 0.8441, + "step": 87100 + }, + { + "epoch": 2.05, + "learning_rate": 3e-06, + "loss": 0.8528, + "step": 87150 + }, + { + "epoch": 2.05, + "learning_rate": 3e-06, + "loss": 0.8619, + "step": 87200 + }, + { + "epoch": 2.05, + "learning_rate": 3e-06, + "loss": 0.8434, + "step": 87250 + }, + { + "epoch": 2.05, + "learning_rate": 3e-06, + "loss": 0.846, + "step": 87300 + }, + { + "epoch": 2.05, + "learning_rate": 3e-06, + "loss": 0.8567, + "step": 87350 + }, + { + "epoch": 2.05, + "learning_rate": 3e-06, + "loss": 0.8502, + "step": 87400 + }, + { + "epoch": 2.05, + "learning_rate": 3e-06, + "loss": 0.8531, + "step": 87450 + }, + { + "epoch": 2.05, + "learning_rate": 3e-06, + "loss": 0.8515, + "step": 87500 + }, + { + "epoch": 2.05, + "learning_rate": 3e-06, + "loss": 0.8344, + "step": 87550 + }, + { + "epoch": 2.05, + "learning_rate": 3e-06, + "loss": 0.8511, + "step": 87600 + }, + { + "epoch": 2.05, + "learning_rate": 3e-06, + "loss": 0.8474, + "step": 87650 + }, + { + "epoch": 2.05, + "learning_rate": 3e-06, + "loss": 0.8474, + "step": 87700 + }, + { + "epoch": 2.05, + "learning_rate": 3e-06, + "loss": 0.8569, + "step": 87750 + }, + { + "epoch": 2.05, + "learning_rate": 3e-06, + "loss": 0.8505, + "step": 87800 + }, + { + "epoch": 2.05, + "learning_rate": 3e-06, + "loss": 0.8571, + "step": 87850 + }, + { + "epoch": 2.05, + "learning_rate": 3e-06, + "loss": 0.8469, + "step": 87900 + }, + { + "epoch": 2.05, + "learning_rate": 3e-06, + "loss": 0.8564, + "step": 87950 + }, + { + "epoch": 2.05, + "learning_rate": 3e-06, + "loss": 0.8505, + "step": 88000 + }, + { + "epoch": 2.05, + "eval_loss": 0.5163885354995728, + "eval_runtime": 50.9215, + "eval_samples_per_second": 3.162, + "eval_steps_per_second": 0.059, + "eval_wer": 28.01461632155907, + "step": 88000 + }, + { + "epoch": 2.05, + "learning_rate": 3e-06, + "loss": 0.8538, + "step": 88050 + }, + { + "epoch": 2.06, + "learning_rate": 3e-06, + "loss": 0.8592, + "step": 88100 + }, + { + "epoch": 2.06, + "learning_rate": 3e-06, + "loss": 0.851, + "step": 88150 + }, + { + "epoch": 2.06, + "learning_rate": 3e-06, + "loss": 0.8455, + "step": 88200 + }, + { + "epoch": 2.06, + "learning_rate": 3e-06, + "loss": 0.8412, + "step": 88250 + }, + { + "epoch": 2.06, + "learning_rate": 3e-06, + "loss": 0.8507, + "step": 88300 + }, + { + "epoch": 2.06, + "learning_rate": 3e-06, + "loss": 0.8462, + "step": 88350 + }, + { + "epoch": 2.06, + "learning_rate": 3e-06, + "loss": 0.8452, + "step": 88400 + }, + { + "epoch": 2.06, + "learning_rate": 3e-06, + "loss": 0.8406, + "step": 88450 + }, + { + "epoch": 2.06, + "learning_rate": 3e-06, + "loss": 0.8348, + "step": 88500 + }, + { + "epoch": 2.06, + "learning_rate": 3e-06, + "loss": 0.8376, + "step": 88550 + }, + { + "epoch": 2.06, + "learning_rate": 3e-06, + "loss": 0.8336, + "step": 88600 + }, + { + "epoch": 2.06, + "learning_rate": 3e-06, + "loss": 0.848, + "step": 88650 + }, + { + "epoch": 2.06, + "learning_rate": 3e-06, + "loss": 0.8507, + "step": 88700 + }, + { + "epoch": 2.06, + "learning_rate": 3e-06, + "loss": 0.8512, + "step": 88750 + }, + { + "epoch": 2.06, + "learning_rate": 3e-06, + "loss": 0.8441, + "step": 88800 + }, + { + "epoch": 2.06, + "learning_rate": 3e-06, + "loss": 0.8459, + "step": 88850 + }, + { + "epoch": 2.06, + "learning_rate": 3e-06, + "loss": 0.8484, + "step": 88900 + }, + { + "epoch": 2.06, + "learning_rate": 3e-06, + "loss": 0.8389, + "step": 88950 + }, + { + "epoch": 2.06, + "learning_rate": 3e-06, + "loss": 0.8512, + "step": 89000 + }, + { + "epoch": 2.06, + "eval_loss": 0.5173874497413635, + "eval_runtime": 39.69, + "eval_samples_per_second": 4.056, + "eval_steps_per_second": 0.076, + "eval_wer": 25.091352009744217, + "step": 89000 + }, + { + "epoch": 2.06, + "learning_rate": 3e-06, + "loss": 0.8477, + "step": 89050 + }, + { + "epoch": 2.07, + "learning_rate": 3e-06, + "loss": 0.8642, + "step": 89100 + }, + { + "epoch": 2.07, + "learning_rate": 3e-06, + "loss": 0.8497, + "step": 89150 + }, + { + "epoch": 2.07, + "learning_rate": 3e-06, + "loss": 0.8489, + "step": 89200 + }, + { + "epoch": 2.07, + "learning_rate": 3e-06, + "loss": 0.8383, + "step": 89250 + }, + { + "epoch": 2.07, + "learning_rate": 3e-06, + "loss": 0.8441, + "step": 89300 + }, + { + "epoch": 2.07, + "learning_rate": 3e-06, + "loss": 0.8523, + "step": 89350 + }, + { + "epoch": 2.07, + "learning_rate": 3e-06, + "loss": 0.836, + "step": 89400 + }, + { + "epoch": 2.07, + "learning_rate": 3e-06, + "loss": 0.8397, + "step": 89450 + }, + { + "epoch": 2.07, + "learning_rate": 3e-06, + "loss": 0.8432, + "step": 89500 + }, + { + "epoch": 2.07, + "learning_rate": 3e-06, + "loss": 0.8383, + "step": 89550 + }, + { + "epoch": 2.07, + "learning_rate": 3e-06, + "loss": 0.8475, + "step": 89600 + }, + { + "epoch": 2.07, + "learning_rate": 3e-06, + "loss": 0.8469, + "step": 89650 + }, + { + "epoch": 2.07, + "learning_rate": 3e-06, + "loss": 0.8537, + "step": 89700 + }, + { + "epoch": 2.07, + "learning_rate": 3e-06, + "loss": 0.8516, + "step": 89750 + }, + { + "epoch": 2.07, + "learning_rate": 3e-06, + "loss": 0.8452, + "step": 89800 + }, + { + "epoch": 2.07, + "learning_rate": 3e-06, + "loss": 0.847, + "step": 89850 + }, + { + "epoch": 2.07, + "learning_rate": 3e-06, + "loss": 0.8373, + "step": 89900 + }, + { + "epoch": 2.07, + "learning_rate": 3e-06, + "loss": 0.8446, + "step": 89950 + }, + { + "epoch": 2.07, + "learning_rate": 3e-06, + "loss": 0.8495, + "step": 90000 + }, + { + "epoch": 2.07, + "eval_loss": 0.5141222476959229, + "eval_runtime": 38.4488, + "eval_samples_per_second": 4.187, + "eval_steps_per_second": 0.078, + "eval_wer": 25.54811205846529, + "step": 90000 + }, + { + "epoch": 2.07, + "learning_rate": 3e-06, + "loss": 0.8508, + "step": 90050 + }, + { + "epoch": 2.08, + "learning_rate": 3e-06, + "loss": 0.8408, + "step": 90100 + }, + { + "epoch": 2.08, + "learning_rate": 3e-06, + "loss": 0.8439, + "step": 90150 + }, + { + "epoch": 2.08, + "learning_rate": 3e-06, + "loss": 0.8476, + "step": 90200 + }, + { + "epoch": 2.08, + "learning_rate": 3e-06, + "loss": 0.8434, + "step": 90250 + }, + { + "epoch": 2.08, + "learning_rate": 3e-06, + "loss": 0.8535, + "step": 90300 + }, + { + "epoch": 2.08, + "learning_rate": 3e-06, + "loss": 0.8414, + "step": 90350 + }, + { + "epoch": 2.08, + "learning_rate": 3e-06, + "loss": 0.853, + "step": 90400 + }, + { + "epoch": 2.08, + "learning_rate": 3e-06, + "loss": 0.8436, + "step": 90450 + }, + { + "epoch": 2.08, + "learning_rate": 3e-06, + "loss": 0.8366, + "step": 90500 + }, + { + "epoch": 2.08, + "learning_rate": 3e-06, + "loss": 0.8387, + "step": 90550 + }, + { + "epoch": 2.08, + "learning_rate": 3e-06, + "loss": 0.8421, + "step": 90600 + }, + { + "epoch": 2.08, + "learning_rate": 3e-06, + "loss": 0.8371, + "step": 90650 + }, + { + "epoch": 2.08, + "learning_rate": 3e-06, + "loss": 0.8542, + "step": 90700 + }, + { + "epoch": 2.08, + "learning_rate": 3e-06, + "loss": 0.8543, + "step": 90750 + }, + { + "epoch": 2.08, + "learning_rate": 3e-06, + "loss": 0.8352, + "step": 90800 + }, + { + "epoch": 2.08, + "learning_rate": 3e-06, + "loss": 0.8383, + "step": 90850 + }, + { + "epoch": 3.0, + "learning_rate": 3e-06, + "loss": 0.841, + "step": 90900 + }, + { + "epoch": 3.0, + "learning_rate": 3e-06, + "loss": 0.839, + "step": 90950 + }, + { + "epoch": 3.0, + "learning_rate": 3e-06, + "loss": 0.8381, + "step": 91000 + }, + { + "epoch": 3.0, + "eval_loss": 0.5129591226577759, + "eval_runtime": 38.5934, + "eval_samples_per_second": 4.172, + "eval_steps_per_second": 0.078, + "eval_wer": 24.96954933008526, + "step": 91000 + }, + { + "epoch": 3.0, + "learning_rate": 3e-06, + "loss": 0.828, + "step": 91050 + }, + { + "epoch": 3.0, + "learning_rate": 3e-06, + "loss": 0.8258, + "step": 91100 + }, + { + "epoch": 3.0, + "learning_rate": 3e-06, + "loss": 0.8379, + "step": 91150 + }, + { + "epoch": 3.0, + "learning_rate": 3e-06, + "loss": 0.8315, + "step": 91200 + }, + { + "epoch": 3.0, + "learning_rate": 3e-06, + "loss": 0.8401, + "step": 91250 + }, + { + "epoch": 3.0, + "learning_rate": 3e-06, + "loss": 0.8444, + "step": 91300 + }, + { + "epoch": 3.0, + "learning_rate": 3e-06, + "loss": 0.843, + "step": 91350 + }, + { + "epoch": 3.01, + "learning_rate": 3e-06, + "loss": 0.8325, + "step": 91400 + }, + { + "epoch": 3.01, + "learning_rate": 3e-06, + "loss": 0.8416, + "step": 91450 + }, + { + "epoch": 3.01, + "learning_rate": 3e-06, + "loss": 0.8468, + "step": 91500 + }, + { + "epoch": 3.01, + "learning_rate": 3e-06, + "loss": 0.8376, + "step": 91550 + }, + { + "epoch": 3.01, + "learning_rate": 3e-06, + "loss": 0.8431, + "step": 91600 + }, + { + "epoch": 3.01, + "learning_rate": 3e-06, + "loss": 0.8307, + "step": 91650 + }, + { + "epoch": 3.01, + "learning_rate": 3e-06, + "loss": 0.83, + "step": 91700 + }, + { + "epoch": 3.01, + "learning_rate": 3e-06, + "loss": 0.8404, + "step": 91750 + }, + { + "epoch": 3.01, + "learning_rate": 3e-06, + "loss": 0.8429, + "step": 91800 + }, + { + "epoch": 3.01, + "learning_rate": 3e-06, + "loss": 0.8328, + "step": 91850 + }, + { + "epoch": 3.01, + "learning_rate": 3e-06, + "loss": 0.83, + "step": 91900 + }, + { + "epoch": 3.01, + "learning_rate": 3e-06, + "loss": 0.8256, + "step": 91950 + }, + { + "epoch": 3.01, + "learning_rate": 3e-06, + "loss": 0.8253, + "step": 92000 + }, + { + "epoch": 3.01, + "eval_loss": 0.5146694779396057, + "eval_runtime": 35.1807, + "eval_samples_per_second": 4.576, + "eval_steps_per_second": 0.085, + "eval_wer": 25.57856272838002, + "step": 92000 + }, + { + "epoch": 3.01, + "learning_rate": 3e-06, + "loss": 0.8357, + "step": 92050 + }, + { + "epoch": 3.01, + "learning_rate": 3e-06, + "loss": 0.8417, + "step": 92100 + }, + { + "epoch": 3.01, + "learning_rate": 3e-06, + "loss": 0.8426, + "step": 92150 + }, + { + "epoch": 3.01, + "learning_rate": 3e-06, + "loss": 0.8331, + "step": 92200 + }, + { + "epoch": 3.01, + "learning_rate": 3e-06, + "loss": 0.8215, + "step": 92250 + }, + { + "epoch": 3.01, + "learning_rate": 3e-06, + "loss": 0.8274, + "step": 92300 + }, + { + "epoch": 3.01, + "learning_rate": 3e-06, + "loss": 0.8268, + "step": 92350 + }, + { + "epoch": 3.02, + "learning_rate": 3e-06, + "loss": 0.8322, + "step": 92400 + }, + { + "epoch": 3.02, + "learning_rate": 3e-06, + "loss": 0.84, + "step": 92450 + }, + { + "epoch": 3.02, + "learning_rate": 3e-06, + "loss": 0.8407, + "step": 92500 + }, + { + "epoch": 3.02, + "learning_rate": 3e-06, + "loss": 0.8404, + "step": 92550 + }, + { + "epoch": 3.02, + "learning_rate": 3e-06, + "loss": 0.8395, + "step": 92600 + }, + { + "epoch": 3.02, + "learning_rate": 3e-06, + "loss": 0.8346, + "step": 92650 + }, + { + "epoch": 3.02, + "learning_rate": 3e-06, + "loss": 0.8292, + "step": 92700 + }, + { + "epoch": 3.02, + "learning_rate": 3e-06, + "loss": 0.8389, + "step": 92750 + }, + { + "epoch": 3.02, + "learning_rate": 3e-06, + "loss": 0.8261, + "step": 92800 + }, + { + "epoch": 3.02, + "learning_rate": 3e-06, + "loss": 0.8261, + "step": 92850 + }, + { + "epoch": 3.02, + "learning_rate": 3e-06, + "loss": 0.8343, + "step": 92900 + }, + { + "epoch": 3.02, + "learning_rate": 3e-06, + "loss": 0.8294, + "step": 92950 + }, + { + "epoch": 3.02, + "learning_rate": 3e-06, + "loss": 0.8387, + "step": 93000 + }, + { + "epoch": 3.02, + "eval_loss": 0.5168384313583374, + "eval_runtime": 38.8899, + "eval_samples_per_second": 4.14, + "eval_steps_per_second": 0.077, + "eval_wer": 24.908647990255787, + "step": 93000 + }, + { + "epoch": 3.02, + "learning_rate": 3e-06, + "loss": 0.8296, + "step": 93050 + }, + { + "epoch": 3.02, + "learning_rate": 3e-06, + "loss": 0.8306, + "step": 93100 + }, + { + "epoch": 3.02, + "learning_rate": 3e-06, + "loss": 0.8364, + "step": 93150 + }, + { + "epoch": 3.02, + "learning_rate": 3e-06, + "loss": 0.8453, + "step": 93200 + }, + { + "epoch": 3.02, + "learning_rate": 3e-06, + "loss": 0.8191, + "step": 93250 + }, + { + "epoch": 3.02, + "learning_rate": 3e-06, + "loss": 0.8257, + "step": 93300 + }, + { + "epoch": 3.02, + "learning_rate": 3e-06, + "loss": 0.8376, + "step": 93350 + }, + { + "epoch": 3.03, + "learning_rate": 3e-06, + "loss": 0.8242, + "step": 93400 + }, + { + "epoch": 3.03, + "learning_rate": 3e-06, + "loss": 0.8238, + "step": 93450 + }, + { + "epoch": 3.03, + "learning_rate": 3e-06, + "loss": 0.8319, + "step": 93500 + }, + { + "epoch": 3.03, + "learning_rate": 3e-06, + "loss": 0.8355, + "step": 93550 + }, + { + "epoch": 3.03, + "learning_rate": 3e-06, + "loss": 0.8372, + "step": 93600 + }, + { + "epoch": 3.03, + "learning_rate": 3e-06, + "loss": 0.8326, + "step": 93650 + }, + { + "epoch": 3.03, + "learning_rate": 3e-06, + "loss": 0.8339, + "step": 93700 + }, + { + "epoch": 3.03, + "learning_rate": 3e-06, + "loss": 0.838, + "step": 93750 + }, + { + "epoch": 3.03, + "learning_rate": 3e-06, + "loss": 0.8365, + "step": 93800 + }, + { + "epoch": 3.03, + "learning_rate": 3e-06, + "loss": 0.8359, + "step": 93850 + }, + { + "epoch": 3.03, + "learning_rate": 3e-06, + "loss": 0.8385, + "step": 93900 + }, + { + "epoch": 3.03, + "learning_rate": 3e-06, + "loss": 0.8357, + "step": 93950 + }, + { + "epoch": 3.03, + "learning_rate": 3e-06, + "loss": 0.8425, + "step": 94000 + }, + { + "epoch": 3.03, + "eval_loss": 0.5134768486022949, + "eval_runtime": 37.6084, + "eval_samples_per_second": 4.281, + "eval_steps_per_second": 0.08, + "eval_wer": 25.243605359317904, + "step": 94000 + }, + { + "epoch": 3.03, + "learning_rate": 3e-06, + "loss": 0.8331, + "step": 94050 + }, + { + "epoch": 3.03, + "learning_rate": 3e-06, + "loss": 0.8438, + "step": 94100 + }, + { + "epoch": 3.03, + "learning_rate": 3e-06, + "loss": 0.8389, + "step": 94150 + }, + { + "epoch": 3.03, + "learning_rate": 3e-06, + "loss": 0.8274, + "step": 94200 + }, + { + "epoch": 3.03, + "learning_rate": 3e-06, + "loss": 0.8254, + "step": 94250 + }, + { + "epoch": 3.03, + "learning_rate": 3e-06, + "loss": 0.8322, + "step": 94300 + }, + { + "epoch": 3.03, + "learning_rate": 3e-06, + "loss": 0.8471, + "step": 94350 + }, + { + "epoch": 3.04, + "learning_rate": 3e-06, + "loss": 0.8358, + "step": 94400 + }, + { + "epoch": 3.04, + "learning_rate": 3e-06, + "loss": 0.8337, + "step": 94450 + }, + { + "epoch": 3.04, + "learning_rate": 3e-06, + "loss": 0.8381, + "step": 94500 + }, + { + "epoch": 3.04, + "learning_rate": 3e-06, + "loss": 0.8384, + "step": 94550 + }, + { + "epoch": 3.04, + "learning_rate": 3e-06, + "loss": 0.8286, + "step": 94600 + }, + { + "epoch": 3.04, + "learning_rate": 3e-06, + "loss": 0.8332, + "step": 94650 + }, + { + "epoch": 3.04, + "learning_rate": 3e-06, + "loss": 0.8362, + "step": 94700 + }, + { + "epoch": 3.04, + "learning_rate": 3e-06, + "loss": 0.8306, + "step": 94750 + }, + { + "epoch": 3.04, + "learning_rate": 3e-06, + "loss": 0.8367, + "step": 94800 + }, + { + "epoch": 3.04, + "learning_rate": 3e-06, + "loss": 0.8349, + "step": 94850 + }, + { + "epoch": 3.04, + "learning_rate": 3e-06, + "loss": 0.8286, + "step": 94900 + }, + { + "epoch": 3.04, + "learning_rate": 3e-06, + "loss": 0.8249, + "step": 94950 + }, + { + "epoch": 3.04, + "learning_rate": 3e-06, + "loss": 0.8339, + "step": 95000 + }, + { + "epoch": 3.04, + "eval_loss": 0.5161515474319458, + "eval_runtime": 40.2582, + "eval_samples_per_second": 3.999, + "eval_steps_per_second": 0.075, + "eval_wer": 25.669914738124238, + "step": 95000 + }, + { + "epoch": 3.04, + "learning_rate": 3e-06, + "loss": 0.8428, + "step": 95050 + }, + { + "epoch": 3.04, + "learning_rate": 3e-06, + "loss": 0.8315, + "step": 95100 + }, + { + "epoch": 3.04, + "learning_rate": 3e-06, + "loss": 0.8442, + "step": 95150 + }, + { + "epoch": 3.04, + "learning_rate": 3e-06, + "loss": 0.8305, + "step": 95200 + }, + { + "epoch": 3.04, + "learning_rate": 3e-06, + "loss": 0.8305, + "step": 95250 + }, + { + "epoch": 3.04, + "learning_rate": 3e-06, + "loss": 0.8485, + "step": 95300 + }, + { + "epoch": 3.04, + "learning_rate": 3e-06, + "loss": 0.8374, + "step": 95350 + }, + { + "epoch": 3.05, + "learning_rate": 3e-06, + "loss": 0.8432, + "step": 95400 + }, + { + "epoch": 3.05, + "learning_rate": 3e-06, + "loss": 0.8406, + "step": 95450 + }, + { + "epoch": 3.05, + "learning_rate": 3e-06, + "loss": 0.8399, + "step": 95500 + }, + { + "epoch": 3.05, + "learning_rate": 3e-06, + "loss": 0.8377, + "step": 95550 + }, + { + "epoch": 3.05, + "learning_rate": 3e-06, + "loss": 0.8297, + "step": 95600 + }, + { + "epoch": 3.05, + "learning_rate": 3e-06, + "loss": 0.8499, + "step": 95650 + }, + { + "epoch": 3.05, + "learning_rate": 3e-06, + "loss": 0.8362, + "step": 95700 + }, + { + "epoch": 3.05, + "learning_rate": 3e-06, + "loss": 0.8404, + "step": 95750 + }, + { + "epoch": 3.05, + "learning_rate": 3e-06, + "loss": 0.8365, + "step": 95800 + }, + { + "epoch": 3.05, + "learning_rate": 3e-06, + "loss": 0.8375, + "step": 95850 + }, + { + "epoch": 3.05, + "learning_rate": 3e-06, + "loss": 0.832, + "step": 95900 + }, + { + "epoch": 3.05, + "learning_rate": 3e-06, + "loss": 0.8343, + "step": 95950 + }, + { + "epoch": 3.05, + "learning_rate": 3e-06, + "loss": 0.8402, + "step": 96000 + }, + { + "epoch": 3.05, + "eval_loss": 0.5146769881248474, + "eval_runtime": 36.7672, + "eval_samples_per_second": 4.379, + "eval_steps_per_second": 0.082, + "eval_wer": 25.730816077953715, + "step": 96000 + }, + { + "epoch": 3.05, + "learning_rate": 3e-06, + "loss": 0.8398, + "step": 96050 + }, + { + "epoch": 3.05, + "learning_rate": 3e-06, + "loss": 0.8369, + "step": 96100 + }, + { + "epoch": 3.05, + "learning_rate": 3e-06, + "loss": 0.8496, + "step": 96150 + }, + { + "epoch": 3.05, + "learning_rate": 3e-06, + "loss": 0.8433, + "step": 96200 + }, + { + "epoch": 3.05, + "learning_rate": 3e-06, + "loss": 0.8453, + "step": 96250 + }, + { + "epoch": 3.05, + "learning_rate": 3e-06, + "loss": 0.8381, + "step": 96300 + }, + { + "epoch": 3.05, + "learning_rate": 3e-06, + "loss": 0.8324, + "step": 96350 + }, + { + "epoch": 3.06, + "learning_rate": 3e-06, + "loss": 0.8411, + "step": 96400 + }, + { + "epoch": 3.06, + "learning_rate": 3e-06, + "loss": 0.8311, + "step": 96450 + }, + { + "epoch": 3.06, + "learning_rate": 3e-06, + "loss": 0.8378, + "step": 96500 + }, + { + "epoch": 3.06, + "learning_rate": 3e-06, + "loss": 0.8364, + "step": 96550 + }, + { + "epoch": 3.06, + "learning_rate": 3e-06, + "loss": 0.8361, + "step": 96600 + }, + { + "epoch": 3.06, + "learning_rate": 3e-06, + "loss": 0.8368, + "step": 96650 + }, + { + "epoch": 3.06, + "learning_rate": 3e-06, + "loss": 0.8367, + "step": 96700 + }, + { + "epoch": 3.06, + "learning_rate": 3e-06, + "loss": 0.833, + "step": 96750 + }, + { + "epoch": 3.06, + "learning_rate": 3e-06, + "loss": 0.8383, + "step": 96800 + }, + { + "epoch": 3.06, + "learning_rate": 3e-06, + "loss": 0.8352, + "step": 96850 + }, + { + "epoch": 3.06, + "learning_rate": 3e-06, + "loss": 0.8421, + "step": 96900 + }, + { + "epoch": 3.06, + "learning_rate": 3e-06, + "loss": 0.8359, + "step": 96950 + }, + { + "epoch": 3.06, + "learning_rate": 3e-06, + "loss": 0.8396, + "step": 97000 + }, + { + "epoch": 3.06, + "eval_loss": 0.5142699480056763, + "eval_runtime": 37.3451, + "eval_samples_per_second": 4.311, + "eval_steps_per_second": 0.08, + "eval_wer": 25.669914738124238, + "step": 97000 + }, + { + "epoch": 3.06, + "learning_rate": 3e-06, + "loss": 0.8277, + "step": 97050 + }, + { + "epoch": 3.06, + "learning_rate": 3e-06, + "loss": 0.8331, + "step": 97100 + }, + { + "epoch": 3.06, + "learning_rate": 3e-06, + "loss": 0.825, + "step": 97150 + }, + { + "epoch": 3.06, + "learning_rate": 3e-06, + "loss": 0.8222, + "step": 97200 + }, + { + "epoch": 3.06, + "learning_rate": 3e-06, + "loss": 0.8335, + "step": 97250 + }, + { + "epoch": 3.06, + "learning_rate": 3e-06, + "loss": 0.8344, + "step": 97300 + }, + { + "epoch": 3.06, + "learning_rate": 3e-06, + "loss": 0.8327, + "step": 97350 + }, + { + "epoch": 3.07, + "learning_rate": 3e-06, + "loss": 0.8256, + "step": 97400 + }, + { + "epoch": 3.07, + "learning_rate": 3e-06, + "loss": 0.8365, + "step": 97450 + }, + { + "epoch": 3.07, + "learning_rate": 3e-06, + "loss": 0.8245, + "step": 97500 + }, + { + "epoch": 3.07, + "learning_rate": 3e-06, + "loss": 0.8234, + "step": 97550 + }, + { + "epoch": 3.07, + "learning_rate": 3e-06, + "loss": 0.8386, + "step": 97600 + }, + { + "epoch": 3.07, + "learning_rate": 3e-06, + "loss": 0.8338, + "step": 97650 + }, + { + "epoch": 3.07, + "learning_rate": 3e-06, + "loss": 0.8403, + "step": 97700 + }, + { + "epoch": 3.07, + "learning_rate": 3e-06, + "loss": 0.8465, + "step": 97750 + }, + { + "epoch": 3.07, + "learning_rate": 3e-06, + "loss": 0.836, + "step": 97800 + }, + { + "epoch": 3.07, + "learning_rate": 3e-06, + "loss": 0.8236, + "step": 97850 + }, + { + "epoch": 3.07, + "learning_rate": 3e-06, + "loss": 0.8387, + "step": 97900 + }, + { + "epoch": 3.07, + "learning_rate": 3e-06, + "loss": 0.8309, + "step": 97950 + }, + { + "epoch": 3.07, + "learning_rate": 3e-06, + "loss": 0.8432, + "step": 98000 + }, + { + "epoch": 3.07, + "eval_loss": 0.5099794864654541, + "eval_runtime": 38.2718, + "eval_samples_per_second": 4.207, + "eval_steps_per_second": 0.078, + "eval_wer": 24.878197320341048, + "step": 98000 + }, + { + "epoch": 3.07, + "learning_rate": 3e-06, + "loss": 0.8324, + "step": 98050 + }, + { + "epoch": 3.07, + "learning_rate": 3e-06, + "loss": 0.8328, + "step": 98100 + }, + { + "epoch": 3.07, + "learning_rate": 3e-06, + "loss": 0.8281, + "step": 98150 + }, + { + "epoch": 3.07, + "learning_rate": 3e-06, + "loss": 0.8282, + "step": 98200 + }, + { + "epoch": 3.07, + "learning_rate": 3e-06, + "loss": 0.8376, + "step": 98250 + }, + { + "epoch": 3.07, + "learning_rate": 3e-06, + "loss": 0.8453, + "step": 98300 + }, + { + "epoch": 3.07, + "learning_rate": 3e-06, + "loss": 0.8347, + "step": 98350 + }, + { + "epoch": 3.08, + "learning_rate": 3e-06, + "loss": 0.828, + "step": 98400 + }, + { + "epoch": 3.08, + "learning_rate": 3e-06, + "loss": 0.8273, + "step": 98450 + }, + { + "epoch": 3.08, + "learning_rate": 3e-06, + "loss": 0.8421, + "step": 98500 + }, + { + "epoch": 3.08, + "learning_rate": 3e-06, + "loss": 0.8237, + "step": 98550 + }, + { + "epoch": 3.08, + "learning_rate": 3e-06, + "loss": 0.8207, + "step": 98600 + }, + { + "epoch": 3.08, + "learning_rate": 3e-06, + "loss": 0.8481, + "step": 98650 + }, + { + "epoch": 3.08, + "learning_rate": 3e-06, + "loss": 0.8414, + "step": 98700 + }, + { + "epoch": 3.08, + "learning_rate": 3e-06, + "loss": 0.8472, + "step": 98750 + }, + { + "epoch": 3.08, + "learning_rate": 3e-06, + "loss": 0.8257, + "step": 98800 + }, + { + "epoch": 3.08, + "learning_rate": 3e-06, + "loss": 0.8283, + "step": 98850 + }, + { + "epoch": 3.08, + "learning_rate": 3e-06, + "loss": 0.8336, + "step": 98900 + }, + { + "epoch": 3.08, + "learning_rate": 3e-06, + "loss": 0.832, + "step": 98950 + }, + { + "epoch": 3.08, + "learning_rate": 3e-06, + "loss": 0.844, + "step": 99000 + }, + { + "epoch": 3.08, + "eval_loss": 0.5100468397140503, + "eval_runtime": 36.7593, + "eval_samples_per_second": 4.38, + "eval_steps_per_second": 0.082, + "eval_wer": 25.060901339829478, + "step": 99000 + }, + { + "epoch": 3.08, + "learning_rate": 3e-06, + "loss": 0.839, + "step": 99050 + }, + { + "epoch": 3.08, + "learning_rate": 3e-06, + "loss": 0.838, + "step": 99100 + }, + { + "epoch": 3.08, + "learning_rate": 3e-06, + "loss": 0.8347, + "step": 99150 + }, + { + "epoch": 4.0, + "learning_rate": 3e-06, + "loss": 0.8243, + "step": 99200 + }, + { + "epoch": 4.0, + "learning_rate": 3e-06, + "loss": 0.8305, + "step": 99250 + }, + { + "epoch": 4.0, + "learning_rate": 3e-06, + "loss": 0.8251, + "step": 99300 + }, + { + "epoch": 4.0, + "learning_rate": 3e-06, + "loss": 0.8218, + "step": 99350 + }, + { + "epoch": 4.0, + "learning_rate": 3e-06, + "loss": 0.8176, + "step": 99400 + }, + { + "epoch": 4.0, + "learning_rate": 3e-06, + "loss": 0.8345, + "step": 99450 + }, + { + "epoch": 4.0, + "learning_rate": 3e-06, + "loss": 0.8259, + "step": 99500 + }, + { + "epoch": 4.0, + "learning_rate": 3e-06, + "loss": 0.8298, + "step": 99550 + }, + { + "epoch": 4.0, + "learning_rate": 3e-06, + "loss": 0.8183, + "step": 99600 + }, + { + "epoch": 4.0, + "learning_rate": 3e-06, + "loss": 0.83, + "step": 99650 + }, + { + "epoch": 4.01, + "learning_rate": 3e-06, + "loss": 0.8267, + "step": 99700 + }, + { + "epoch": 4.01, + "learning_rate": 3e-06, + "loss": 0.8235, + "step": 99750 + }, + { + "epoch": 4.01, + "learning_rate": 3e-06, + "loss": 0.8223, + "step": 99800 + }, + { + "epoch": 4.01, + "learning_rate": 3e-06, + "loss": 0.8331, + "step": 99850 + }, + { + "epoch": 4.01, + "learning_rate": 3e-06, + "loss": 0.82, + "step": 99900 + }, + { + "epoch": 4.01, + "learning_rate": 3e-06, + "loss": 0.8196, + "step": 99950 + }, + { + "epoch": 4.01, + "learning_rate": 3e-06, + "loss": 0.8333, + "step": 100000 + }, + { + "epoch": 4.01, + "eval_loss": 0.512780487537384, + "eval_runtime": 38.9453, + "eval_samples_per_second": 4.134, + "eval_steps_per_second": 0.077, + "eval_wer": 24.96954933008526, + "step": 100000 + }, + { + "epoch": 4.01, + "step": 100000, + "total_flos": 3.151209909694464e+20, + "train_loss": 0.2913371723175049, + "train_runtime": 623384.3788, + "train_samples_per_second": 41.066, + "train_steps_per_second": 0.16 + } + ], + "max_steps": 100000, + "num_train_epochs": 9223372036854775807, + "total_flos": 3.151209909694464e+20, + "trial_name": null, + "trial_params": null +}