{ "best_metric": null, "best_model_checkpoint": null, "epoch": 50.0, "global_step": 10600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.47, "learning_rate": 3.4299999999999998e-06, "loss": 11.8059, "step": 100 }, { "epoch": 0.94, "learning_rate": 6.93e-06, "loss": 6.741, "step": 200 }, { "epoch": 1.42, "learning_rate": 1.0429999999999998e-05, "loss": 4.6303, "step": 300 }, { "epoch": 1.89, "learning_rate": 1.3929999999999999e-05, "loss": 4.0577, "step": 400 }, { "epoch": 2.36, "learning_rate": 1.7429999999999997e-05, "loss": 3.6055, "step": 500 }, { "epoch": 2.83, "learning_rate": 2.0929999999999998e-05, "loss": 3.2771, "step": 600 }, { "epoch": 3.3, "learning_rate": 2.4429999999999995e-05, "loss": 3.0775, "step": 700 }, { "epoch": 3.77, "learning_rate": 2.793e-05, "loss": 2.9768, "step": 800 }, { "epoch": 4.25, "learning_rate": 3.1429999999999996e-05, "loss": 2.8612, "step": 900 }, { "epoch": 4.72, "learning_rate": 3.493e-05, "loss": 2.8112, "step": 1000 }, { "epoch": 4.72, "eval_loss": 2.778607130050659, "eval_runtime": 101.5624, "eval_samples_per_second": 29.913, "eval_steps_per_second": 0.935, "eval_wer": 1.0, "step": 1000 }, { "epoch": 5.19, "learning_rate": 3.843e-05, "loss": 2.7937, "step": 1100 }, { "epoch": 5.66, "learning_rate": 4.192999999999999e-05, "loss": 2.7598, "step": 1200 }, { "epoch": 6.13, "learning_rate": 4.543e-05, "loss": 2.5782, "step": 1300 }, { "epoch": 6.6, "learning_rate": 4.8929999999999994e-05, "loss": 2.0779, "step": 1400 }, { "epoch": 7.08, "learning_rate": 5.243e-05, "loss": 1.7522, "step": 1500 }, { "epoch": 7.55, "learning_rate": 5.593e-05, "loss": 1.6003, "step": 1600 }, { "epoch": 8.02, "learning_rate": 5.942999999999999e-05, "loss": 1.534, "step": 1700 }, { "epoch": 8.49, "learning_rate": 6.293e-05, "loss": 1.4587, "step": 1800 }, { "epoch": 8.96, "learning_rate": 6.642999999999999e-05, "loss": 1.4313, "step": 1900 }, { "epoch": 9.43, "learning_rate": 6.992999999999999e-05, "loss": 1.3803, "step": 2000 }, { "epoch": 9.43, "eval_loss": 0.49710744619369507, "eval_runtime": 101.1307, "eval_samples_per_second": 30.04, "eval_steps_per_second": 0.939, "eval_wer": 0.5787768779670484, "step": 2000 }, { "epoch": 9.91, "learning_rate": 6.920232558139535e-05, "loss": 1.3528, "step": 2100 }, { "epoch": 10.38, "learning_rate": 6.838837209302325e-05, "loss": 1.3382, "step": 2200 }, { "epoch": 10.85, "learning_rate": 6.757441860465116e-05, "loss": 1.3103, "step": 2300 }, { "epoch": 11.32, "learning_rate": 6.676046511627906e-05, "loss": 1.2818, "step": 2400 }, { "epoch": 11.79, "learning_rate": 6.594651162790697e-05, "loss": 1.265, "step": 2500 }, { "epoch": 12.26, "learning_rate": 6.513255813953489e-05, "loss": 1.2608, "step": 2600 }, { "epoch": 12.74, "learning_rate": 6.431860465116279e-05, "loss": 1.2263, "step": 2700 }, { "epoch": 13.21, "learning_rate": 6.350465116279069e-05, "loss": 1.2069, "step": 2800 }, { "epoch": 13.68, "learning_rate": 6.26906976744186e-05, "loss": 1.2102, "step": 2900 }, { "epoch": 14.15, "learning_rate": 6.18767441860465e-05, "loss": 1.1894, "step": 3000 }, { "epoch": 14.15, "eval_loss": 0.36050862073898315, "eval_runtime": 100.2529, "eval_samples_per_second": 30.303, "eval_steps_per_second": 0.948, "eval_wer": 0.44417760402122314, "step": 3000 }, { "epoch": 14.62, "learning_rate": 6.106279069767441e-05, "loss": 1.1845, "step": 3100 }, { "epoch": 15.09, "learning_rate": 6.024883720930232e-05, "loss": 1.1819, "step": 3200 }, { "epoch": 15.57, "learning_rate": 5.943488372093023e-05, "loss": 1.1565, "step": 3300 }, { "epoch": 16.04, "learning_rate": 5.8620930232558136e-05, "loss": 1.1506, "step": 3400 }, { "epoch": 16.51, "learning_rate": 5.7806976744186036e-05, "loss": 1.1254, "step": 3500 }, { "epoch": 16.98, "learning_rate": 5.699302325581395e-05, "loss": 1.1444, "step": 3600 }, { "epoch": 17.45, "learning_rate": 5.6179069767441856e-05, "loss": 1.1189, "step": 3700 }, { "epoch": 17.92, "learning_rate": 5.536511627906976e-05, "loss": 1.1219, "step": 3800 }, { "epoch": 18.4, "learning_rate": 5.455116279069767e-05, "loss": 1.1075, "step": 3900 }, { "epoch": 18.87, "learning_rate": 5.373720930232558e-05, "loss": 1.1019, "step": 4000 }, { "epoch": 18.87, "eval_loss": 0.3251776695251465, "eval_runtime": 100.5327, "eval_samples_per_second": 30.219, "eval_steps_per_second": 0.945, "eval_wer": 0.42356883552080427, "step": 4000 }, { "epoch": 19.34, "learning_rate": 5.292325581395348e-05, "loss": 1.1104, "step": 4100 }, { "epoch": 19.81, "learning_rate": 5.212558139534883e-05, "loss": 1.0985, "step": 4200 }, { "epoch": 20.28, "learning_rate": 5.1311627906976736e-05, "loss": 1.0878, "step": 4300 }, { "epoch": 20.75, "learning_rate": 5.049767441860465e-05, "loss": 1.0766, "step": 4400 }, { "epoch": 21.23, "learning_rate": 4.968372093023256e-05, "loss": 1.071, "step": 4500 }, { "epoch": 21.7, "learning_rate": 4.8869767441860464e-05, "loss": 1.0508, "step": 4600 }, { "epoch": 22.17, "learning_rate": 4.8055813953488364e-05, "loss": 1.0644, "step": 4700 }, { "epoch": 22.64, "learning_rate": 4.7249999999999997e-05, "loss": 1.0571, "step": 4800 }, { "epoch": 23.11, "learning_rate": 4.6436046511627903e-05, "loss": 1.0457, "step": 4900 }, { "epoch": 23.58, "learning_rate": 4.562209302325581e-05, "loss": 1.0366, "step": 5000 }, { "epoch": 23.58, "eval_loss": 0.31269633769989014, "eval_runtime": 101.0057, "eval_samples_per_second": 30.078, "eval_steps_per_second": 0.941, "eval_wer": 0.40234571348785253, "step": 5000 }, { "epoch": 24.06, "learning_rate": 4.480813953488372e-05, "loss": 1.0395, "step": 5100 }, { "epoch": 24.53, "learning_rate": 4.3994186046511624e-05, "loss": 1.0344, "step": 5200 }, { "epoch": 25.0, "learning_rate": 4.318023255813954e-05, "loss": 1.0318, "step": 5300 }, { "epoch": 25.47, "learning_rate": 4.236627906976744e-05, "loss": 1.0291, "step": 5400 }, { "epoch": 25.94, "learning_rate": 4.1552325581395345e-05, "loss": 1.0325, "step": 5500 }, { "epoch": 26.42, "learning_rate": 4.073837209302325e-05, "loss": 1.0205, "step": 5600 }, { "epoch": 26.89, "learning_rate": 3.992441860465116e-05, "loss": 1.0276, "step": 5700 }, { "epoch": 27.36, "learning_rate": 3.9110465116279065e-05, "loss": 1.0122, "step": 5800 }, { "epoch": 27.83, "learning_rate": 3.829651162790698e-05, "loss": 1.0054, "step": 5900 }, { "epoch": 28.3, "learning_rate": 3.748255813953488e-05, "loss": 1.0217, "step": 6000 }, { "epoch": 28.3, "eval_loss": 0.3026880621910095, "eval_runtime": 100.3358, "eval_samples_per_second": 30.278, "eval_steps_per_second": 0.947, "eval_wer": 0.39525272270315553, "step": 6000 }, { "epoch": 28.77, "learning_rate": 3.6668604651162786e-05, "loss": 1.0134, "step": 6100 }, { "epoch": 29.25, "learning_rate": 3.585465116279069e-05, "loss": 0.9899, "step": 6200 }, { "epoch": 29.72, "learning_rate": 3.50406976744186e-05, "loss": 0.9984, "step": 6300 }, { "epoch": 30.19, "learning_rate": 3.422674418604651e-05, "loss": 0.9952, "step": 6400 }, { "epoch": 30.66, "learning_rate": 3.341279069767442e-05, "loss": 0.9957, "step": 6500 }, { "epoch": 31.13, "learning_rate": 3.259883720930232e-05, "loss": 0.9922, "step": 6600 }, { "epoch": 31.6, "learning_rate": 3.178488372093023e-05, "loss": 0.9707, "step": 6700 }, { "epoch": 32.08, "learning_rate": 3.097093023255814e-05, "loss": 0.9876, "step": 6800 }, { "epoch": 32.55, "learning_rate": 3.0156976744186045e-05, "loss": 0.9701, "step": 6900 }, { "epoch": 33.02, "learning_rate": 2.9343023255813948e-05, "loss": 0.9774, "step": 7000 }, { "epoch": 33.02, "eval_loss": 0.2924236059188843, "eval_runtime": 101.009, "eval_samples_per_second": 30.077, "eval_steps_per_second": 0.941, "eval_wer": 0.3965372800893605, "step": 7000 }, { "epoch": 33.49, "learning_rate": 2.852906976744186e-05, "loss": 0.9745, "step": 7100 }, { "epoch": 33.96, "learning_rate": 2.7715116279069765e-05, "loss": 0.9682, "step": 7200 }, { "epoch": 34.43, "learning_rate": 2.6901162790697676e-05, "loss": 0.9593, "step": 7300 }, { "epoch": 34.91, "learning_rate": 2.608720930232558e-05, "loss": 0.9591, "step": 7400 }, { "epoch": 35.38, "learning_rate": 2.5273255813953486e-05, "loss": 0.9582, "step": 7500 }, { "epoch": 35.85, "learning_rate": 2.4459302325581396e-05, "loss": 0.9567, "step": 7600 }, { "epoch": 36.32, "learning_rate": 2.36453488372093e-05, "loss": 0.9476, "step": 7700 }, { "epoch": 36.79, "learning_rate": 2.2831395348837207e-05, "loss": 0.9534, "step": 7800 }, { "epoch": 37.26, "learning_rate": 2.2017441860465117e-05, "loss": 0.9473, "step": 7900 }, { "epoch": 37.74, "learning_rate": 2.120348837209302e-05, "loss": 0.9485, "step": 8000 }, { "epoch": 37.74, "eval_loss": 0.2948923110961914, "eval_runtime": 99.2137, "eval_samples_per_second": 30.621, "eval_steps_per_second": 0.958, "eval_wer": 0.3755375593409662, "step": 8000 }, { "epoch": 38.21, "learning_rate": 2.0389534883720927e-05, "loss": 0.9344, "step": 8100 }, { "epoch": 38.68, "learning_rate": 1.9575581395348838e-05, "loss": 0.9357, "step": 8200 }, { "epoch": 39.15, "learning_rate": 1.876162790697674e-05, "loss": 0.9545, "step": 8300 }, { "epoch": 39.62, "learning_rate": 1.794767441860465e-05, "loss": 0.9289, "step": 8400 }, { "epoch": 40.09, "learning_rate": 1.7133720930232558e-05, "loss": 0.9434, "step": 8500 }, { "epoch": 40.57, "learning_rate": 1.6319767441860465e-05, "loss": 0.9319, "step": 8600 }, { "epoch": 41.04, "learning_rate": 1.551395348837209e-05, "loss": 0.9357, "step": 8700 }, { "epoch": 41.51, "learning_rate": 1.4699999999999998e-05, "loss": 0.9105, "step": 8800 }, { "epoch": 41.98, "learning_rate": 1.3886046511627905e-05, "loss": 0.9299, "step": 8900 }, { "epoch": 42.45, "learning_rate": 1.3072093023255814e-05, "loss": 0.9332, "step": 9000 }, { "epoch": 42.45, "eval_loss": 0.28152021765708923, "eval_runtime": 100.0017, "eval_samples_per_second": 30.379, "eval_steps_per_second": 0.95, "eval_wer": 0.3664898073163921, "step": 9000 }, { "epoch": 42.92, "learning_rate": 1.225813953488372e-05, "loss": 0.922, "step": 9100 }, { "epoch": 43.4, "learning_rate": 1.1444186046511626e-05, "loss": 0.9161, "step": 9200 }, { "epoch": 43.87, "learning_rate": 1.0630232558139534e-05, "loss": 0.9203, "step": 9300 }, { "epoch": 44.34, "learning_rate": 9.816279069767441e-06, "loss": 0.9221, "step": 9400 }, { "epoch": 44.81, "learning_rate": 9.002325581395346e-06, "loss": 0.8945, "step": 9500 }, { "epoch": 45.28, "learning_rate": 8.188372093023255e-06, "loss": 0.9347, "step": 9600 }, { "epoch": 45.75, "learning_rate": 7.374418604651162e-06, "loss": 0.8967, "step": 9700 }, { "epoch": 46.23, "learning_rate": 6.560465116279069e-06, "loss": 0.9211, "step": 9800 }, { "epoch": 46.7, "learning_rate": 5.7465116279069765e-06, "loss": 0.9007, "step": 9900 }, { "epoch": 47.17, "learning_rate": 4.9325581395348825e-06, "loss": 0.9093, "step": 10000 }, { "epoch": 47.17, "eval_loss": 0.2779529094696045, "eval_runtime": 102.291, "eval_samples_per_second": 29.7, "eval_steps_per_second": 0.929, "eval_wer": 0.3623010332309411, "step": 10000 }, { "epoch": 47.64, "learning_rate": 4.11860465116279e-06, "loss": 0.9066, "step": 10100 }, { "epoch": 48.11, "learning_rate": 3.3046511627906976e-06, "loss": 0.9154, "step": 10200 }, { "epoch": 48.58, "learning_rate": 2.4906976744186045e-06, "loss": 0.9112, "step": 10300 }, { "epoch": 49.06, "learning_rate": 1.6767441860465114e-06, "loss": 0.8968, "step": 10400 }, { "epoch": 49.53, "learning_rate": 8.627906976744186e-07, "loss": 0.9083, "step": 10500 }, { "epoch": 50.0, "learning_rate": 5.697674418604651e-08, "loss": 0.9076, "step": 10600 }, { "epoch": 50.0, "step": 10600, "total_flos": 3.899540035775118e+19, "train_loss": 1.4489057677646853, "train_runtime": 15709.6404, "train_samples_per_second": 21.544, "train_steps_per_second": 0.675 } ], "max_steps": 10600, "num_train_epochs": 50, "total_flos": 3.899540035775118e+19, "trial_name": null, "trial_params": null }