{ "best_metric": null, "best_model_checkpoint": null, "epoch": 200.3030303030303, "global_step": 9815, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 2.04, "learning_rate": 8.16743119266055e-06, "loss": 15.5589, "step": 100 }, { "epoch": 4.08, "learning_rate": 1.573623853211009e-05, "loss": 6.7324, "step": 200 }, { "epoch": 6.12, "learning_rate": 2.330504587155963e-05, "loss": 4.9014, "step": 300 }, { "epoch": 8.16, "learning_rate": 3.087385321100917e-05, "loss": 3.9471, "step": 400 }, { "epoch": 8.16, "eval_cer": 1.0, "eval_loss": 3.7109458446502686, "eval_runtime": 120.434, "eval_samples_per_second": 23.573, "eval_steps_per_second": 0.374, "eval_wer": 1.0, "step": 400 }, { "epoch": 10.2, "learning_rate": 3.8442660550458714e-05, "loss": 3.5201, "step": 500 }, { "epoch": 12.24, "learning_rate": 4.601146788990825e-05, "loss": 3.4075, "step": 600 }, { "epoch": 14.28, "learning_rate": 5.35802752293578e-05, "loss": 3.3368, "step": 700 }, { "epoch": 16.32, "learning_rate": 6.114908256880733e-05, "loss": 3.274, "step": 800 }, { "epoch": 16.32, "eval_cer": 0.9572537581332735, "eval_loss": 3.1581757068634033, "eval_runtime": 118.5811, "eval_samples_per_second": 23.941, "eval_steps_per_second": 0.379, "eval_wer": 0.9917300840188272, "step": 800 }, { "epoch": 18.36, "learning_rate": 6.871788990825688e-05, "loss": 2.8673, "step": 900 }, { "epoch": 20.4, "learning_rate": 7.5e-05, "loss": 2.0372, "step": 1000 }, { "epoch": 22.44, "learning_rate": 7.5e-05, "loss": 1.7262, "step": 1100 }, { "epoch": 24.48, "learning_rate": 7.5e-05, "loss": 1.5889, "step": 1200 }, { "epoch": 24.48, "eval_cer": 0.19903073816468478, "eval_loss": 0.7762717604637146, "eval_runtime": 118.6201, "eval_samples_per_second": 23.934, "eval_steps_per_second": 0.379, "eval_wer": 0.6030440329037082, "step": 1200 }, { "epoch": 26.53, "learning_rate": 7.5e-05, "loss": 1.5113, "step": 1300 }, { "epoch": 28.57, "learning_rate": 7.5e-05, "loss": 1.4529, "step": 1400 }, { "epoch": 30.61, "learning_rate": 7.5e-05, "loss": 1.4042, "step": 1500 }, { "epoch": 32.65, "learning_rate": 7.5e-05, "loss": 1.3647, "step": 1600 }, { "epoch": 32.65, "eval_cer": 0.16868745793134396, "eval_loss": 0.605066180229187, "eval_runtime": 119.2577, "eval_samples_per_second": 23.806, "eval_steps_per_second": 0.377, "eval_wer": 0.5134826023841992, "step": 1600 }, { "epoch": 34.69, "learning_rate": 7.5e-05, "loss": 1.3249, "step": 1700 }, { "epoch": 36.73, "learning_rate": 7.5e-05, "loss": 1.3121, "step": 1800 }, { "epoch": 38.77, "learning_rate": 7.5e-05, "loss": 1.2899, "step": 1900 }, { "epoch": 40.81, "learning_rate": 7.5e-05, "loss": 1.2532, "step": 2000 }, { "epoch": 40.81, "eval_cer": 0.15389724029616333, "eval_loss": 0.5423398017883301, "eval_runtime": 118.7297, "eval_samples_per_second": 23.911, "eval_steps_per_second": 0.379, "eval_wer": 0.4711652663528791, "step": 2000 }, { "epoch": 42.85, "learning_rate": 7.5e-05, "loss": 1.2254, "step": 2100 }, { "epoch": 44.89, "learning_rate": 7.5e-05, "loss": 1.2028, "step": 2200 }, { "epoch": 46.93, "learning_rate": 7.5e-05, "loss": 1.1903, "step": 2300 }, { "epoch": 48.97, "learning_rate": 7.5e-05, "loss": 1.1905, "step": 2400 }, { "epoch": 48.97, "eval_cer": 0.1490329818263406, "eval_loss": 0.5179790258407593, "eval_runtime": 119.6696, "eval_samples_per_second": 23.724, "eval_steps_per_second": 0.376, "eval_wer": 0.4532177891171425, "step": 2400 }, { "epoch": 51.02, "learning_rate": 7.5e-05, "loss": 1.1671, "step": 2500 }, { "epoch": 53.06, "learning_rate": 7.5e-05, "loss": 1.1381, "step": 2600 }, { "epoch": 55.1, "learning_rate": 7.5e-05, "loss": 1.131, "step": 2700 }, { "epoch": 57.14, "learning_rate": 7.5e-05, "loss": 1.1193, "step": 2800 }, { "epoch": 57.14, "eval_cer": 0.13925959165357865, "eval_loss": 0.4906373620033264, "eval_runtime": 119.1486, "eval_samples_per_second": 23.827, "eval_steps_per_second": 0.378, "eval_wer": 0.4248009501605595, "step": 2800 }, { "epoch": 59.18, "learning_rate": 7.5e-05, "loss": 1.1008, "step": 2900 }, { "epoch": 61.22, "learning_rate": 7.5e-05, "loss": 1.0784, "step": 3000 }, { "epoch": 63.26, "learning_rate": 7.5e-05, "loss": 1.0678, "step": 3100 }, { "epoch": 65.3, "learning_rate": 7.5e-05, "loss": 1.0584, "step": 3200 }, { "epoch": 65.3, "eval_cer": 0.13315683194974198, "eval_loss": 0.48543456196784973, "eval_runtime": 119.1946, "eval_samples_per_second": 23.818, "eval_steps_per_second": 0.378, "eval_wer": 0.40685347292482293, "step": 3200 }, { "epoch": 67.34, "learning_rate": 7.5e-05, "loss": 1.0434, "step": 3300 }, { "epoch": 69.38, "learning_rate": 7.5e-05, "loss": 1.0332, "step": 3400 }, { "epoch": 71.42, "learning_rate": 7.5e-05, "loss": 1.0198, "step": 3500 }, { "epoch": 73.46, "learning_rate": 7.5e-05, "loss": 1.0095, "step": 3600 }, { "epoch": 73.46, "eval_cer": 0.12868745793134395, "eval_loss": 0.4780372381210327, "eval_runtime": 118.9725, "eval_samples_per_second": 23.863, "eval_steps_per_second": 0.378, "eval_wer": 0.39255707561694453, "step": 3600 }, { "epoch": 75.51, "learning_rate": 7.5e-05, "loss": 0.9989, "step": 3700 }, { "epoch": 77.55, "learning_rate": 7.5e-05, "loss": 0.9924, "step": 3800 }, { "epoch": 79.59, "learning_rate": 7.5e-05, "loss": 0.9878, "step": 3900 }, { "epoch": 81.63, "learning_rate": 7.5e-05, "loss": 0.9759, "step": 4000 }, { "epoch": 81.63, "eval_cer": 0.12686560466681623, "eval_loss": 0.46656644344329834, "eval_runtime": 116.6705, "eval_samples_per_second": 24.333, "eval_steps_per_second": 0.386, "eval_wer": 0.39251308670215107, "step": 4000 }, { "epoch": 83.67, "learning_rate": 7.5e-05, "loss": 0.9659, "step": 4100 }, { "epoch": 85.71, "learning_rate": 7.5e-05, "loss": 0.9531, "step": 4200 }, { "epoch": 87.75, "learning_rate": 7.5e-05, "loss": 0.9452, "step": 4300 }, { "epoch": 89.79, "learning_rate": 7.5e-05, "loss": 0.9593, "step": 4400 }, { "epoch": 89.79, "eval_cer": 0.1247386134170967, "eval_loss": 0.4808085262775421, "eval_runtime": 119.5296, "eval_samples_per_second": 23.751, "eval_steps_per_second": 0.376, "eval_wer": 0.38296749219196763, "step": 4400 }, { "epoch": 91.83, "learning_rate": 7.5e-05, "loss": 0.928, "step": 4500 }, { "epoch": 93.87, "learning_rate": 7.5e-05, "loss": 0.9251, "step": 4600 }, { "epoch": 95.91, "learning_rate": 7.5e-05, "loss": 0.9098, "step": 4700 }, { "epoch": 97.95, "learning_rate": 7.5e-05, "loss": 0.909, "step": 4800 }, { "epoch": 97.95, "eval_cer": 0.1212385012340139, "eval_loss": 0.4797753393650055, "eval_runtime": 116.9311, "eval_samples_per_second": 24.279, "eval_steps_per_second": 0.385, "eval_wer": 0.37645713280253373, "step": 4800 }, { "epoch": 99.99, "learning_rate": 7.5e-05, "loss": 0.8979, "step": 4900 }, { "epoch": 102.04, "learning_rate": 7.370797677261614e-05, "loss": 0.8993, "step": 5000 }, { "epoch": 104.08, "learning_rate": 7.22562652811736e-05, "loss": 0.8888, "step": 5100 }, { "epoch": 106.12, "learning_rate": 7.080455378973104e-05, "loss": 0.8788, "step": 5200 }, { "epoch": 106.12, "eval_cer": 0.11615885124523222, "eval_loss": 0.4906252324581146, "eval_runtime": 119.2237, "eval_samples_per_second": 23.812, "eval_steps_per_second": 0.377, "eval_wer": 0.3608410680508512, "step": 5200 }, { "epoch": 108.16, "learning_rate": 6.93528422982885e-05, "loss": 0.8701, "step": 5300 }, { "epoch": 110.2, "learning_rate": 6.790113080684596e-05, "loss": 0.8621, "step": 5400 }, { "epoch": 112.24, "learning_rate": 6.644941931540342e-05, "loss": 0.8525, "step": 5500 }, { "epoch": 114.28, "learning_rate": 6.499770782396088e-05, "loss": 0.8471, "step": 5600 }, { "epoch": 114.28, "eval_cer": 0.11655373569665695, "eval_loss": 0.4758923053741455, "eval_runtime": 117.4104, "eval_samples_per_second": 24.18, "eval_steps_per_second": 0.383, "eval_wer": 0.360357189988123, "step": 5600 }, { "epoch": 116.32, "learning_rate": 6.354599633251834e-05, "loss": 0.8442, "step": 5700 }, { "epoch": 118.36, "learning_rate": 6.209428484107578e-05, "loss": 0.8244, "step": 5800 }, { "epoch": 120.4, "learning_rate": 6.0642573349633246e-05, "loss": 0.8224, "step": 5900 }, { "epoch": 122.44, "learning_rate": 5.9190861858190705e-05, "loss": 0.8116, "step": 6000 }, { "epoch": 122.44, "eval_cer": 0.11757684541171191, "eval_loss": 0.5079970359802246, "eval_runtime": 118.6326, "eval_samples_per_second": 23.931, "eval_steps_per_second": 0.379, "eval_wer": 0.3627325913869705, "step": 6000 }, { "epoch": 124.48, "learning_rate": 5.773915036674816e-05, "loss": 0.807, "step": 6100 }, { "epoch": 126.53, "learning_rate": 5.6287438875305616e-05, "loss": 0.8011, "step": 6200 }, { "epoch": 128.57, "learning_rate": 5.4835727383863075e-05, "loss": 0.799, "step": 6300 }, { "epoch": 130.61, "learning_rate": 5.3384015892420534e-05, "loss": 0.7881, "step": 6400 }, { "epoch": 130.61, "eval_cer": 0.11347543190486875, "eval_loss": 0.4867645502090454, "eval_runtime": 118.7534, "eval_samples_per_second": 23.907, "eval_steps_per_second": 0.379, "eval_wer": 0.34892007214182025, "step": 6400 }, { "epoch": 132.65, "learning_rate": 5.193230440097799e-05, "loss": 0.7781, "step": 6500 }, { "epoch": 134.69, "learning_rate": 5.048059290953545e-05, "loss": 0.7685, "step": 6600 }, { "epoch": 136.73, "learning_rate": 4.902888141809291e-05, "loss": 0.7654, "step": 6700 }, { "epoch": 138.77, "learning_rate": 4.757716992665037e-05, "loss": 0.766, "step": 6800 }, { "epoch": 138.77, "eval_cer": 0.11357415301772493, "eval_loss": 0.49552106857299805, "eval_runtime": 117.4435, "eval_samples_per_second": 24.173, "eval_steps_per_second": 0.383, "eval_wer": 0.34922799454537456, "step": 6800 }, { "epoch": 140.81, "learning_rate": 4.6139975550122245e-05, "loss": 0.752, "step": 6900 }, { "epoch": 142.85, "learning_rate": 4.4688264058679704e-05, "loss": 0.7451, "step": 7000 }, { "epoch": 144.89, "learning_rate": 4.323655256723716e-05, "loss": 0.7398, "step": 7100 }, { "epoch": 146.93, "learning_rate": 4.178484107579462e-05, "loss": 0.7333, "step": 7200 }, { "epoch": 146.93, "eval_cer": 0.11247027148306035, "eval_loss": 0.5019353032112122, "eval_runtime": 118.907, "eval_samples_per_second": 23.876, "eval_steps_per_second": 0.378, "eval_wer": 0.3461487705098315, "step": 7200 }, { "epoch": 148.97, "learning_rate": 4.033312958435208e-05, "loss": 0.7255, "step": 7300 }, { "epoch": 151.02, "learning_rate": 3.888141809290954e-05, "loss": 0.7283, "step": 7400 }, { "epoch": 153.06, "learning_rate": 3.742970660146699e-05, "loss": 0.7149, "step": 7500 }, { "epoch": 155.1, "learning_rate": 3.597799511002445e-05, "loss": 0.709, "step": 7600 }, { "epoch": 155.1, "eval_cer": 0.11168050258021091, "eval_loss": 0.5083914995193481, "eval_runtime": 120.383, "eval_samples_per_second": 23.583, "eval_steps_per_second": 0.374, "eval_wer": 0.3468086042317336, "step": 7600 }, { "epoch": 157.14, "learning_rate": 3.452628361858191e-05, "loss": 0.7084, "step": 7700 }, { "epoch": 159.18, "learning_rate": 3.307457212713937e-05, "loss": 0.704, "step": 7800 }, { "epoch": 161.22, "learning_rate": 3.1622860635696827e-05, "loss": 0.698, "step": 7900 }, { "epoch": 163.26, "learning_rate": 3.0171149144254275e-05, "loss": 0.6911, "step": 8000 }, { "epoch": 163.26, "eval_cer": 0.11064841821853265, "eval_loss": 0.5144167542457581, "eval_runtime": 120.6954, "eval_samples_per_second": 23.522, "eval_steps_per_second": 0.373, "eval_wer": 0.34122201205296265, "step": 8000 }, { "epoch": 165.3, "learning_rate": 2.8719437652811734e-05, "loss": 0.6889, "step": 8100 }, { "epoch": 167.34, "learning_rate": 2.726772616136919e-05, "loss": 0.6863, "step": 8200 }, { "epoch": 169.38, "learning_rate": 2.581601466992665e-05, "loss": 0.6825, "step": 8300 }, { "epoch": 171.42, "learning_rate": 2.4364303178484107e-05, "loss": 0.6683, "step": 8400 }, { "epoch": 171.42, "eval_cer": 0.11165357864034103, "eval_loss": 0.5218909382820129, "eval_runtime": 119.5764, "eval_samples_per_second": 23.742, "eval_steps_per_second": 0.376, "eval_wer": 0.34091408964940834, "step": 8400 }, { "epoch": 173.46, "learning_rate": 2.2912591687041566e-05, "loss": 0.6627, "step": 8500 }, { "epoch": 175.51, "learning_rate": 2.1460880195599022e-05, "loss": 0.6655, "step": 8600 }, { "epoch": 177.55, "learning_rate": 2.000916870415648e-05, "loss": 0.6662, "step": 8700 }, { "epoch": 179.59, "learning_rate": 1.855745721271394e-05, "loss": 0.659, "step": 8800 }, { "epoch": 179.59, "eval_cer": 0.1095983845636078, "eval_loss": 0.5229596495628357, "eval_runtime": 119.167, "eval_samples_per_second": 23.824, "eval_steps_per_second": 0.378, "eval_wer": 0.3375709321251045, "step": 8800 }, { "epoch": 181.63, "learning_rate": 1.7105745721271395e-05, "loss": 0.6526, "step": 8900 }, { "epoch": 183.67, "learning_rate": 1.5654034229828854e-05, "loss": 0.654, "step": 9000 }, { "epoch": 185.71, "learning_rate": 1.4202322738386313e-05, "loss": 0.6481, "step": 9100 }, { "epoch": 187.75, "learning_rate": 1.275061124694377e-05, "loss": 0.6475, "step": 9200 }, { "epoch": 187.75, "eval_cer": 0.10967915638321742, "eval_loss": 0.5228886604309082, "eval_runtime": 118.4312, "eval_samples_per_second": 23.972, "eval_steps_per_second": 0.38, "eval_wer": 0.3397703778647781, "step": 9200 }, { "epoch": 189.79, "learning_rate": 1.1298899755501229e-05, "loss": 0.6476, "step": 9300 }, { "epoch": 191.83, "learning_rate": 9.847188264058686e-06, "loss": 0.6485, "step": 9400 }, { "epoch": 193.87, "learning_rate": 8.395476772616145e-06, "loss": 0.6449, "step": 9500 }, { "epoch": 195.91, "learning_rate": 6.943765281173602e-06, "loss": 0.6419, "step": 9600 }, { "epoch": 195.91, "eval_cer": 0.10843168050258022, "eval_loss": 0.520015299320221, "eval_runtime": 118.8519, "eval_samples_per_second": 23.887, "eval_steps_per_second": 0.379, "eval_wer": 0.33369990762327895, "step": 9600 }, { "epoch": 197.95, "learning_rate": 5.4920537897310516e-06, "loss": 0.627, "step": 9700 }, { "epoch": 199.99, "learning_rate": 4.054859413202941e-06, "loss": 0.6301, "step": 9800 }, { "epoch": 200.3, "step": 9815, "total_flos": 1.72528931544575e+20, "train_loss": 1.3263086397673363, "train_runtime": 56983.3895, "train_samples_per_second": 22.047, "train_steps_per_second": 0.172 } ], "max_steps": 9815, "num_train_epochs": 201, "total_flos": 1.72528931544575e+20, "trial_name": null, "trial_params": null }