{ "best_metric": null, "best_model_checkpoint": null, "epoch": 50.0, "global_step": 9650, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.52, "learning_rate": 3.7125e-06, "loss": 19.5412, "step": 100 }, { "epoch": 1.04, "learning_rate": 7.4625e-06, "loss": 11.0922, "step": 200 }, { "epoch": 1.55, "learning_rate": 1.1212499999999998e-05, "loss": 7.1984, "step": 300 }, { "epoch": 2.07, "learning_rate": 1.49625e-05, "loss": 5.9337, "step": 400 }, { "epoch": 2.59, "learning_rate": 1.8712499999999997e-05, "loss": 4.9733, "step": 500 }, { "epoch": 2.59, "eval_loss": 5.06973934173584, "eval_runtime": 115.8922, "eval_samples_per_second": 23.237, "eval_steps_per_second": 2.908, "eval_wer": 1.0, "step": 500 }, { "epoch": 3.11, "learning_rate": 2.2462499999999997e-05, "loss": 4.2014, "step": 600 }, { "epoch": 3.63, "learning_rate": 2.6212499999999997e-05, "loss": 3.704, "step": 700 }, { "epoch": 4.15, "learning_rate": 2.99625e-05, "loss": 3.5005, "step": 800 }, { "epoch": 4.66, "learning_rate": 3.37125e-05, "loss": 3.4424, "step": 900 }, { "epoch": 5.18, "learning_rate": 3.7462499999999996e-05, "loss": 3.3839, "step": 1000 }, { "epoch": 5.18, "eval_loss": 3.3517656326293945, "eval_runtime": 115.48, "eval_samples_per_second": 23.32, "eval_steps_per_second": 2.918, "eval_wer": 1.0, "step": 1000 }, { "epoch": 5.7, "learning_rate": 4.12125e-05, "loss": 3.3036, "step": 1100 }, { "epoch": 6.22, "learning_rate": 4.4962499999999995e-05, "loss": 3.2565, "step": 1200 }, { "epoch": 6.74, "learning_rate": 4.871249999999999e-05, "loss": 2.9583, "step": 1300 }, { "epoch": 7.25, "learning_rate": 5.2462499999999994e-05, "loss": 2.3824, "step": 1400 }, { "epoch": 7.77, "learning_rate": 5.62125e-05, "loss": 2.0596, "step": 1500 }, { "epoch": 7.77, "eval_loss": 1.3991833925247192, "eval_runtime": 116.0149, "eval_samples_per_second": 23.213, "eval_steps_per_second": 2.905, "eval_wer": 0.7869276218611522, "step": 1500 }, { "epoch": 8.29, "learning_rate": 5.9962499999999994e-05, "loss": 1.9024, "step": 1600 }, { "epoch": 8.81, "learning_rate": 6.37125e-05, "loss": 1.7879, "step": 1700 }, { "epoch": 9.33, "learning_rate": 6.746249999999999e-05, "loss": 1.7072, "step": 1800 }, { "epoch": 9.84, "learning_rate": 7.121249999999999e-05, "loss": 1.6689, "step": 1900 }, { "epoch": 10.36, "learning_rate": 7.49625e-05, "loss": 1.6102, "step": 2000 }, { "epoch": 10.36, "eval_loss": 1.0711737871170044, "eval_runtime": 115.3994, "eval_samples_per_second": 23.336, "eval_steps_per_second": 2.92, "eval_wer": 0.6754062038404727, "step": 2000 }, { "epoch": 10.88, "learning_rate": 7.40392156862745e-05, "loss": 1.5881, "step": 2100 }, { "epoch": 11.4, "learning_rate": 7.305882352941176e-05, "loss": 1.5371, "step": 2200 }, { "epoch": 11.92, "learning_rate": 7.208823529411764e-05, "loss": 1.5062, "step": 2300 }, { "epoch": 12.44, "learning_rate": 7.11078431372549e-05, "loss": 1.4704, "step": 2400 }, { "epoch": 12.95, "learning_rate": 7.012745098039215e-05, "loss": 1.4587, "step": 2500 }, { "epoch": 12.95, "eval_loss": 0.9280298352241516, "eval_runtime": 115.6447, "eval_samples_per_second": 23.287, "eval_steps_per_second": 2.914, "eval_wer": 0.6361244460856721, "step": 2500 }, { "epoch": 13.47, "learning_rate": 6.91470588235294e-05, "loss": 1.4201, "step": 2600 }, { "epoch": 13.99, "learning_rate": 6.81764705882353e-05, "loss": 1.4221, "step": 2700 }, { "epoch": 14.51, "learning_rate": 6.719607843137255e-05, "loss": 1.3937, "step": 2800 }, { "epoch": 15.03, "learning_rate": 6.62156862745098e-05, "loss": 1.3984, "step": 2900 }, { "epoch": 15.54, "learning_rate": 6.523529411764705e-05, "loss": 1.3667, "step": 3000 }, { "epoch": 15.54, "eval_loss": 0.9280989766120911, "eval_runtime": 115.1284, "eval_samples_per_second": 23.391, "eval_steps_per_second": 2.927, "eval_wer": 0.6155372968980798, "step": 3000 }, { "epoch": 16.06, "learning_rate": 6.425490196078431e-05, "loss": 1.367, "step": 3100 }, { "epoch": 16.58, "learning_rate": 6.327450980392156e-05, "loss": 1.3535, "step": 3200 }, { "epoch": 17.1, "learning_rate": 6.229411764705881e-05, "loss": 1.3265, "step": 3300 }, { "epoch": 17.62, "learning_rate": 6.131372549019608e-05, "loss": 1.3202, "step": 3400 }, { "epoch": 18.13, "learning_rate": 6.033333333333333e-05, "loss": 1.3042, "step": 3500 }, { "epoch": 18.13, "eval_loss": 0.9037219882011414, "eval_runtime": 114.717, "eval_samples_per_second": 23.475, "eval_steps_per_second": 2.938, "eval_wer": 0.5921344165435746, "step": 3500 }, { "epoch": 18.65, "learning_rate": 5.9352941176470584e-05, "loss": 1.3016, "step": 3600 }, { "epoch": 19.17, "learning_rate": 5.837254901960784e-05, "loss": 1.2862, "step": 3700 }, { "epoch": 19.69, "learning_rate": 5.739215686274509e-05, "loss": 1.2773, "step": 3800 }, { "epoch": 20.21, "learning_rate": 5.641176470588235e-05, "loss": 1.2543, "step": 3900 }, { "epoch": 20.73, "learning_rate": 5.54313725490196e-05, "loss": 1.2544, "step": 4000 }, { "epoch": 20.73, "eval_loss": 0.8996412754058838, "eval_runtime": 115.5118, "eval_samples_per_second": 23.314, "eval_steps_per_second": 2.917, "eval_wer": 0.5824409158050221, "step": 4000 }, { "epoch": 21.24, "learning_rate": 5.445098039215686e-05, "loss": 1.2419, "step": 4100 }, { "epoch": 21.76, "learning_rate": 5.347058823529411e-05, "loss": 1.2347, "step": 4200 }, { "epoch": 22.28, "learning_rate": 5.2490196078431365e-05, "loss": 1.2373, "step": 4300 }, { "epoch": 22.8, "learning_rate": 5.150980392156863e-05, "loss": 1.2337, "step": 4400 }, { "epoch": 23.32, "learning_rate": 5.052941176470588e-05, "loss": 1.2274, "step": 4500 }, { "epoch": 23.32, "eval_loss": 0.8933804631233215, "eval_runtime": 115.4101, "eval_samples_per_second": 23.334, "eval_steps_per_second": 2.92, "eval_wer": 0.5797175036927622, "step": 4500 }, { "epoch": 23.83, "learning_rate": 4.9549019607843137e-05, "loss": 1.2091, "step": 4600 }, { "epoch": 24.35, "learning_rate": 4.856862745098039e-05, "loss": 1.1947, "step": 4700 }, { "epoch": 24.87, "learning_rate": 4.759803921568627e-05, "loss": 1.1901, "step": 4800 }, { "epoch": 25.39, "learning_rate": 4.6617647058823525e-05, "loss": 1.1731, "step": 4900 }, { "epoch": 25.91, "learning_rate": 4.5637254901960776e-05, "loss": 1.1763, "step": 5000 }, { "epoch": 25.91, "eval_loss": 0.8642701506614685, "eval_runtime": 115.2886, "eval_samples_per_second": 23.359, "eval_steps_per_second": 2.923, "eval_wer": 0.5759785819793205, "step": 5000 }, { "epoch": 26.42, "learning_rate": 4.465686274509803e-05, "loss": 1.1661, "step": 5100 }, { "epoch": 26.94, "learning_rate": 4.367647058823529e-05, "loss": 1.1628, "step": 5200 }, { "epoch": 27.46, "learning_rate": 4.269607843137254e-05, "loss": 1.1618, "step": 5300 }, { "epoch": 27.98, "learning_rate": 4.1715686274509805e-05, "loss": 1.1468, "step": 5400 }, { "epoch": 28.5, "learning_rate": 4.0735294117647055e-05, "loss": 1.149, "step": 5500 }, { "epoch": 28.5, "eval_loss": 0.8251490592956543, "eval_runtime": 116.3124, "eval_samples_per_second": 23.153, "eval_steps_per_second": 2.897, "eval_wer": 0.5543759231905465, "step": 5500 }, { "epoch": 29.02, "learning_rate": 3.975490196078431e-05, "loss": 1.1572, "step": 5600 }, { "epoch": 29.53, "learning_rate": 3.877450980392157e-05, "loss": 1.1389, "step": 5700 }, { "epoch": 30.05, "learning_rate": 3.779411764705882e-05, "loss": 1.1337, "step": 5800 }, { "epoch": 30.57, "learning_rate": 3.681372549019607e-05, "loss": 1.1226, "step": 5900 }, { "epoch": 31.09, "learning_rate": 3.5833333333333335e-05, "loss": 1.1207, "step": 6000 }, { "epoch": 31.09, "eval_loss": 0.8505932092666626, "eval_runtime": 115.2841, "eval_samples_per_second": 23.36, "eval_steps_per_second": 2.923, "eval_wer": 0.5527141802067946, "step": 6000 }, { "epoch": 31.61, "learning_rate": 3.4852941176470585e-05, "loss": 1.1243, "step": 6100 }, { "epoch": 32.12, "learning_rate": 3.387254901960784e-05, "loss": 1.1156, "step": 6200 }, { "epoch": 32.64, "learning_rate": 3.289215686274509e-05, "loss": 1.1158, "step": 6300 }, { "epoch": 33.16, "learning_rate": 3.191176470588235e-05, "loss": 1.0918, "step": 6400 }, { "epoch": 33.68, "learning_rate": 3.09313725490196e-05, "loss": 1.091, "step": 6500 }, { "epoch": 33.68, "eval_loss": 0.8370497822761536, "eval_runtime": 115.5394, "eval_samples_per_second": 23.308, "eval_steps_per_second": 2.917, "eval_wer": 0.5365583456425406, "step": 6500 }, { "epoch": 34.2, "learning_rate": 2.995098039215686e-05, "loss": 1.0892, "step": 6600 }, { "epoch": 34.72, "learning_rate": 2.8980392156862746e-05, "loss": 1.0799, "step": 6700 }, { "epoch": 35.23, "learning_rate": 2.8e-05, "loss": 1.0671, "step": 6800 }, { "epoch": 35.75, "learning_rate": 2.7019607843137253e-05, "loss": 1.07, "step": 6900 }, { "epoch": 36.27, "learning_rate": 2.6049019607843135e-05, "loss": 1.0613, "step": 7000 }, { "epoch": 36.27, "eval_loss": 0.8345041275024414, "eval_runtime": 116.5707, "eval_samples_per_second": 23.102, "eval_steps_per_second": 2.891, "eval_wer": 0.5351735598227474, "step": 7000 }, { "epoch": 36.79, "learning_rate": 2.506862745098039e-05, "loss": 1.0797, "step": 7100 }, { "epoch": 37.31, "learning_rate": 2.4088235294117646e-05, "loss": 1.0787, "step": 7200 }, { "epoch": 37.82, "learning_rate": 2.31078431372549e-05, "loss": 1.0706, "step": 7300 }, { "epoch": 38.34, "learning_rate": 2.2127450980392153e-05, "loss": 1.0585, "step": 7400 }, { "epoch": 38.86, "learning_rate": 2.114705882352941e-05, "loss": 1.0495, "step": 7500 }, { "epoch": 38.86, "eval_loss": 0.8380374908447266, "eval_runtime": 115.7848, "eval_samples_per_second": 23.259, "eval_steps_per_second": 2.911, "eval_wer": 0.5321270310192023, "step": 7500 }, { "epoch": 39.38, "learning_rate": 2.0166666666666664e-05, "loss": 1.0456, "step": 7600 }, { "epoch": 39.9, "learning_rate": 1.9186274509803922e-05, "loss": 1.0446, "step": 7700 }, { "epoch": 40.41, "learning_rate": 1.8205882352941176e-05, "loss": 1.0353, "step": 7800 }, { "epoch": 40.93, "learning_rate": 1.722549019607843e-05, "loss": 1.0375, "step": 7900 }, { "epoch": 41.45, "learning_rate": 1.6245098039215687e-05, "loss": 1.0345, "step": 8000 }, { "epoch": 41.45, "eval_loss": 0.828546941280365, "eval_runtime": 115.0908, "eval_samples_per_second": 23.399, "eval_steps_per_second": 2.928, "eval_wer": 0.5269110044313147, "step": 8000 }, { "epoch": 41.97, "learning_rate": 1.526470588235294e-05, "loss": 1.0196, "step": 8100 }, { "epoch": 42.49, "learning_rate": 1.4284313725490196e-05, "loss": 1.0265, "step": 8200 }, { "epoch": 43.01, "learning_rate": 1.330392156862745e-05, "loss": 1.028, "step": 8300 }, { "epoch": 43.52, "learning_rate": 1.2323529411764704e-05, "loss": 1.0281, "step": 8400 }, { "epoch": 44.04, "learning_rate": 1.1343137254901961e-05, "loss": 1.0297, "step": 8500 }, { "epoch": 44.04, "eval_loss": 0.7836087346076965, "eval_runtime": 116.6272, "eval_samples_per_second": 23.091, "eval_steps_per_second": 2.89, "eval_wer": 0.5141248153618907, "step": 8500 }, { "epoch": 44.56, "learning_rate": 1.0362745098039215e-05, "loss": 1.0097, "step": 8600 }, { "epoch": 45.08, "learning_rate": 9.382352941176469e-06, "loss": 1.0046, "step": 8700 }, { "epoch": 45.6, "learning_rate": 8.401960784313724e-06, "loss": 1.0082, "step": 8800 }, { "epoch": 46.11, "learning_rate": 7.42156862745098e-06, "loss": 1.0065, "step": 8900 }, { "epoch": 46.63, "learning_rate": 6.4411764705882346e-06, "loss": 1.027, "step": 9000 }, { "epoch": 46.63, "eval_loss": 0.8119935989379883, "eval_runtime": 115.6592, "eval_samples_per_second": 23.284, "eval_steps_per_second": 2.914, "eval_wer": 0.5179560561299852, "step": 9000 }, { "epoch": 47.15, "learning_rate": 5.460784313725489e-06, "loss": 0.9917, "step": 9100 }, { "epoch": 47.67, "learning_rate": 4.480392156862745e-06, "loss": 0.9925, "step": 9200 }, { "epoch": 48.19, "learning_rate": 3.5e-06, "loss": 0.9924, "step": 9300 }, { "epoch": 48.7, "learning_rate": 2.5196078431372547e-06, "loss": 0.983, "step": 9400 }, { "epoch": 49.22, "learning_rate": 1.5392156862745098e-06, "loss": 0.9876, "step": 9500 }, { "epoch": 49.22, "eval_loss": 0.8109092116355896, "eval_runtime": 115.9344, "eval_samples_per_second": 23.229, "eval_steps_per_second": 2.907, "eval_wer": 0.5187869276218612, "step": 9500 }, { "epoch": 49.74, "learning_rate": 5.588235294117647e-07, "loss": 0.973, "step": 9600 }, { "epoch": 50.0, "step": 9650, "total_flos": 4.0125393308879946e+19, "train_loss": 1.853262206161578, "train_runtime": 19140.1846, "train_samples_per_second": 16.134, "train_steps_per_second": 0.504 } ], "max_steps": 9650, "num_train_epochs": 50, "total_flos": 4.0125393308879946e+19, "trial_name": null, "trial_params": null }