{ "best_metric": null, "best_model_checkpoint": null, "epoch": 50.0, "global_step": 5750, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.87, "learning_rate": 3.3949999999999997e-06, "loss": 13.6823, "step": 100 }, { "epoch": 1.74, "learning_rate": 6.895e-06, "loss": 7.5854, "step": 200 }, { "epoch": 2.61, "learning_rate": 1.0394999999999998e-05, "loss": 4.3711, "step": 300 }, { "epoch": 2.61, "eval_loss": 4.312221050262451, "eval_runtime": 80.91, "eval_samples_per_second": 21.011, "eval_steps_per_second": 1.322, "eval_wer": 1.0, "step": 300 }, { "epoch": 3.48, "learning_rate": 1.3895e-05, "loss": 3.8129, "step": 400 }, { "epoch": 4.35, "learning_rate": 1.7395e-05, "loss": 3.4258, "step": 500 }, { "epoch": 5.22, "learning_rate": 2.0894999999999996e-05, "loss": 3.1653, "step": 600 }, { "epoch": 5.22, "eval_loss": 3.115588426589966, "eval_runtime": 81.1622, "eval_samples_per_second": 20.946, "eval_steps_per_second": 1.318, "eval_wer": 1.0, "step": 600 }, { "epoch": 6.09, "learning_rate": 2.4394999999999996e-05, "loss": 3.0356, "step": 700 }, { "epoch": 6.96, "learning_rate": 2.7895e-05, "loss": 2.9791, "step": 800 }, { "epoch": 7.83, "learning_rate": 3.1395e-05, "loss": 2.8904, "step": 900 }, { "epoch": 7.83, "eval_loss": 2.842055320739746, "eval_runtime": 84.1755, "eval_samples_per_second": 20.196, "eval_steps_per_second": 1.271, "eval_wer": 0.9918110836031232, "step": 900 }, { "epoch": 8.7, "learning_rate": 3.4895e-05, "loss": 2.1422, "step": 1000 }, { "epoch": 9.57, "learning_rate": 3.8394999999999994e-05, "loss": 1.2257, "step": 1100 }, { "epoch": 10.43, "learning_rate": 4.1895e-05, "loss": 0.9207, "step": 1200 }, { "epoch": 10.43, "eval_loss": 0.9894591569900513, "eval_runtime": 82.3044, "eval_samples_per_second": 20.655, "eval_steps_per_second": 1.3, "eval_wer": 0.8688503777058338, "step": 1200 }, { "epoch": 11.3, "learning_rate": 4.5394999999999995e-05, "loss": 0.7881, "step": 1300 }, { "epoch": 12.17, "learning_rate": 4.8895e-05, "loss": 0.7047, "step": 1400 }, { "epoch": 13.04, "learning_rate": 5.2395e-05, "loss": 0.6384, "step": 1500 }, { "epoch": 13.04, "eval_loss": 0.6993927359580994, "eval_runtime": 82.4631, "eval_samples_per_second": 20.615, "eval_steps_per_second": 1.298, "eval_wer": 0.7700120611946931, "step": 1500 }, { "epoch": 13.91, "learning_rate": 5.589499999999999e-05, "loss": 0.5989, "step": 1600 }, { "epoch": 14.78, "learning_rate": 5.9394999999999996e-05, "loss": 0.5601, "step": 1700 }, { "epoch": 15.65, "learning_rate": 6.289499999999999e-05, "loss": 0.5215, "step": 1800 }, { "epoch": 15.65, "eval_loss": 0.5627515912055969, "eval_runtime": 81.3944, "eval_samples_per_second": 20.886, "eval_steps_per_second": 1.315, "eval_wer": 0.6443217164984447, "step": 1800 }, { "epoch": 16.52, "learning_rate": 6.639499999999999e-05, "loss": 0.4949, "step": 1900 }, { "epoch": 17.39, "learning_rate": 6.9895e-05, "loss": 0.4869, "step": 2000 }, { "epoch": 18.26, "learning_rate": 6.818933333333333e-05, "loss": 0.4573, "step": 2100 }, { "epoch": 18.26, "eval_loss": 0.5316212177276611, "eval_runtime": 82.2602, "eval_samples_per_second": 20.666, "eval_steps_per_second": 1.301, "eval_wer": 0.6174062083412684, "step": 2100 }, { "epoch": 19.13, "learning_rate": 6.632266666666666e-05, "loss": 0.438, "step": 2200 }, { "epoch": 20.0, "learning_rate": 6.4456e-05, "loss": 0.4153, "step": 2300 }, { "epoch": 20.87, "learning_rate": 6.258933333333333e-05, "loss": 0.3875, "step": 2400 }, { "epoch": 20.87, "eval_loss": 0.4931696653366089, "eval_runtime": 80.8801, "eval_samples_per_second": 21.019, "eval_steps_per_second": 1.323, "eval_wer": 0.5778581857423982, "step": 2400 }, { "epoch": 21.74, "learning_rate": 6.072266666666667e-05, "loss": 0.3807, "step": 2500 }, { "epoch": 22.61, "learning_rate": 5.8855999999999993e-05, "loss": 0.3715, "step": 2600 }, { "epoch": 23.48, "learning_rate": 5.6989333333333333e-05, "loss": 0.3562, "step": 2700 }, { "epoch": 23.48, "eval_loss": 0.4971640110015869, "eval_runtime": 82.278, "eval_samples_per_second": 20.662, "eval_steps_per_second": 1.3, "eval_wer": 0.547514759093506, "step": 2700 }, { "epoch": 24.35, "learning_rate": 5.512266666666666e-05, "loss": 0.3457, "step": 2800 }, { "epoch": 25.22, "learning_rate": 5.3256e-05, "loss": 0.3356, "step": 2900 }, { "epoch": 26.09, "learning_rate": 5.1389333333333326e-05, "loss": 0.3218, "step": 3000 }, { "epoch": 26.09, "eval_loss": 0.4894775450229645, "eval_runtime": 81.4053, "eval_samples_per_second": 20.883, "eval_steps_per_second": 1.314, "eval_wer": 0.5219323303497746, "step": 3000 }, { "epoch": 26.96, "learning_rate": 4.9522666666666666e-05, "loss": 0.3072, "step": 3100 }, { "epoch": 27.83, "learning_rate": 4.765599999999999e-05, "loss": 0.3006, "step": 3200 }, { "epoch": 28.7, "learning_rate": 4.578933333333333e-05, "loss": 0.2954, "step": 3300 }, { "epoch": 28.7, "eval_loss": 0.5226009488105774, "eval_runtime": 82.2645, "eval_samples_per_second": 20.665, "eval_steps_per_second": 1.301, "eval_wer": 0.5192026915508157, "step": 3300 }, { "epoch": 29.57, "learning_rate": 4.392266666666666e-05, "loss": 0.2965, "step": 3400 }, { "epoch": 30.43, "learning_rate": 4.2056e-05, "loss": 0.286, "step": 3500 }, { "epoch": 31.3, "learning_rate": 4.018933333333333e-05, "loss": 0.287, "step": 3600 }, { "epoch": 31.3, "eval_loss": 0.495715469121933, "eval_runtime": 79.9357, "eval_samples_per_second": 21.267, "eval_steps_per_second": 1.339, "eval_wer": 0.5145686535897924, "step": 3600 }, { "epoch": 32.17, "learning_rate": 3.8322666666666665e-05, "loss": 0.2768, "step": 3700 }, { "epoch": 33.04, "learning_rate": 3.6456e-05, "loss": 0.2731, "step": 3800 }, { "epoch": 33.91, "learning_rate": 3.458933333333333e-05, "loss": 0.2587, "step": 3900 }, { "epoch": 33.91, "eval_loss": 0.49437007308006287, "eval_runtime": 82.64, "eval_samples_per_second": 20.571, "eval_steps_per_second": 1.295, "eval_wer": 0.48930362470640515, "step": 3900 }, { "epoch": 34.78, "learning_rate": 3.2722666666666664e-05, "loss": 0.2549, "step": 4000 }, { "epoch": 35.65, "learning_rate": 3.0856e-05, "loss": 0.2538, "step": 4100 }, { "epoch": 36.52, "learning_rate": 2.8989333333333334e-05, "loss": 0.2496, "step": 4200 }, { "epoch": 36.52, "eval_loss": 0.4975605010986328, "eval_runtime": 82.1209, "eval_samples_per_second": 20.701, "eval_steps_per_second": 1.303, "eval_wer": 0.4894940646226116, "step": 4200 }, { "epoch": 37.39, "learning_rate": 2.7122666666666667e-05, "loss": 0.2461, "step": 4300 }, { "epoch": 38.26, "learning_rate": 2.5256e-05, "loss": 0.2359, "step": 4400 }, { "epoch": 39.13, "learning_rate": 2.3389333333333333e-05, "loss": 0.2365, "step": 4500 }, { "epoch": 39.13, "eval_loss": 0.5185123085975647, "eval_runtime": 79.6732, "eval_samples_per_second": 21.337, "eval_steps_per_second": 1.343, "eval_wer": 0.4818764679743541, "step": 4500 }, { "epoch": 40.0, "learning_rate": 2.1522666666666666e-05, "loss": 0.2357, "step": 4600 }, { "epoch": 40.87, "learning_rate": 1.9656e-05, "loss": 0.2289, "step": 4700 }, { "epoch": 41.74, "learning_rate": 1.7789333333333333e-05, "loss": 0.2264, "step": 4800 }, { "epoch": 41.74, "eval_loss": 0.5152125954627991, "eval_runtime": 79.201, "eval_samples_per_second": 21.464, "eval_steps_per_second": 1.351, "eval_wer": 0.47755982987367485, "step": 4800 }, { "epoch": 42.61, "learning_rate": 1.5922666666666666e-05, "loss": 0.2211, "step": 4900 }, { "epoch": 43.48, "learning_rate": 1.4055999999999999e-05, "loss": 0.2186, "step": 5000 }, { "epoch": 44.35, "learning_rate": 1.2189333333333332e-05, "loss": 0.2224, "step": 5100 }, { "epoch": 44.35, "eval_loss": 0.5030579566955566, "eval_runtime": 80.9089, "eval_samples_per_second": 21.011, "eval_steps_per_second": 1.322, "eval_wer": 0.4745762711864407, "step": 5100 }, { "epoch": 45.22, "learning_rate": 1.0322666666666665e-05, "loss": 0.2162, "step": 5200 }, { "epoch": 46.09, "learning_rate": 8.456e-06, "loss": 0.2159, "step": 5300 }, { "epoch": 46.96, "learning_rate": 6.589333333333332e-06, "loss": 0.2096, "step": 5400 }, { "epoch": 46.96, "eval_loss": 0.5061585307121277, "eval_runtime": 81.3005, "eval_samples_per_second": 20.91, "eval_steps_per_second": 1.316, "eval_wer": 0.47076747286231196, "step": 5400 }, { "epoch": 47.83, "learning_rate": 4.7226666666666654e-06, "loss": 0.205, "step": 5500 }, { "epoch": 48.7, "learning_rate": 2.856e-06, "loss": 0.2038, "step": 5600 }, { "epoch": 49.57, "learning_rate": 9.893333333333332e-07, "loss": 0.2038, "step": 5700 }, { "epoch": 49.57, "eval_loss": 0.5217297077178955, "eval_runtime": 83.7172, "eval_samples_per_second": 20.306, "eval_steps_per_second": 1.278, "eval_wer": 0.46981527328127975, "step": 5700 }, { "epoch": 50.0, "step": 5750, "total_flos": 2.9609940258263142e+19, "train_loss": 1.123646092788033, "train_runtime": 13730.8326, "train_samples_per_second": 13.386, "train_steps_per_second": 0.419 } ], "max_steps": 5750, "num_train_epochs": 50, "total_flos": 2.9609940258263142e+19, "trial_name": null, "trial_params": null }