{ "best_metric": null, "best_model_checkpoint": null, "epoch": 256.4102564102564, "global_step": 20000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 6.41, "learning_rate": 0.0005, "loss": 1.9105, "step": 500 }, { "epoch": 6.41, "eval_loss": 0.1622474640607834, "eval_runtime": 72.1023, "eval_samples_per_second": 25.755, "eval_steps_per_second": 1.082, "eval_wer": 0.15307930881701373, "step": 500 }, { "epoch": 12.82, "learning_rate": 0.0004871794871794872, "loss": 0.1119, "step": 1000 }, { "epoch": 12.82, "eval_loss": 0.09705421328544617, "eval_runtime": 74.0065, "eval_samples_per_second": 25.092, "eval_steps_per_second": 1.054, "eval_wer": 0.09361984935755427, "step": 1000 }, { "epoch": 19.23, "learning_rate": 0.00047435897435897434, "loss": 0.0614, "step": 1500 }, { "epoch": 19.23, "eval_loss": 0.10023126006126404, "eval_runtime": 74.336, "eval_samples_per_second": 24.981, "eval_steps_per_second": 1.049, "eval_wer": 0.09831634913602126, "step": 1500 }, { "epoch": 25.64, "learning_rate": 0.0004615384615384616, "loss": 0.044, "step": 2000 }, { "epoch": 25.64, "eval_loss": 0.10109349340200424, "eval_runtime": 68.958, "eval_samples_per_second": 26.929, "eval_steps_per_second": 1.131, "eval_wer": 0.09286663712893221, "step": 2000 }, { "epoch": 32.05, "learning_rate": 0.0004487179487179487, "loss": 0.0366, "step": 2500 }, { "epoch": 32.05, "eval_loss": 0.09319411963224411, "eval_runtime": 67.9099, "eval_samples_per_second": 27.345, "eval_steps_per_second": 1.149, "eval_wer": 0.08280903854674347, "step": 2500 }, { "epoch": 38.46, "learning_rate": 0.0004358974358974359, "loss": 0.0315, "step": 3000 }, { "epoch": 38.46, "eval_loss": 0.09261373430490494, "eval_runtime": 68.2657, "eval_samples_per_second": 27.203, "eval_steps_per_second": 1.143, "eval_wer": 0.08803721754541427, "step": 3000 }, { "epoch": 44.87, "learning_rate": 0.0004230769230769231, "loss": 0.0297, "step": 3500 }, { "epoch": 44.87, "eval_loss": 0.09722220152616501, "eval_runtime": 69.3764, "eval_samples_per_second": 26.767, "eval_steps_per_second": 1.124, "eval_wer": 0.08821444395214886, "step": 3500 }, { "epoch": 51.28, "learning_rate": 0.00041025641025641023, "loss": 0.0216, "step": 4000 }, { "epoch": 51.28, "eval_loss": 0.09113188087940216, "eval_runtime": 68.0225, "eval_samples_per_second": 27.3, "eval_steps_per_second": 1.147, "eval_wer": 0.07735932653965441, "step": 4000 }, { "epoch": 57.69, "learning_rate": 0.0003974358974358974, "loss": 0.0211, "step": 4500 }, { "epoch": 57.69, "eval_loss": 0.09818430244922638, "eval_runtime": 70.5517, "eval_samples_per_second": 26.321, "eval_steps_per_second": 1.106, "eval_wer": 0.08910057598582188, "step": 4500 }, { "epoch": 64.1, "learning_rate": 0.00038461538461538467, "loss": 0.0187, "step": 5000 }, { "epoch": 64.1, "eval_loss": 0.10086847096681595, "eval_runtime": 68.8313, "eval_samples_per_second": 26.979, "eval_steps_per_second": 1.133, "eval_wer": 0.08626495347806823, "step": 5000 }, { "epoch": 70.51, "learning_rate": 0.0003717948717948718, "loss": 0.02, "step": 5500 }, { "epoch": 70.51, "eval_loss": 0.09532515704631805, "eval_runtime": 65.875, "eval_samples_per_second": 28.19, "eval_steps_per_second": 1.184, "eval_wer": 0.08520159503766062, "step": 5500 }, { "epoch": 76.92, "learning_rate": 0.000358974358974359, "loss": 0.0163, "step": 6000 }, { "epoch": 76.92, "eval_loss": 0.1028498187661171, "eval_runtime": 62.6651, "eval_samples_per_second": 29.634, "eval_steps_per_second": 1.245, "eval_wer": 0.08041648205582631, "step": 6000 }, { "epoch": 83.33, "learning_rate": 0.00034615384615384613, "loss": 0.0128, "step": 6500 }, { "epoch": 83.33, "eval_loss": 0.09295257925987244, "eval_runtime": 68.7425, "eval_samples_per_second": 27.014, "eval_steps_per_second": 1.135, "eval_wer": 0.08555604785112982, "step": 6500 }, { "epoch": 89.74, "learning_rate": 0.0003333333333333333, "loss": 0.0127, "step": 7000 }, { "epoch": 89.74, "eval_loss": 0.08917286247015, "eval_runtime": 65.0895, "eval_samples_per_second": 28.53, "eval_steps_per_second": 1.198, "eval_wer": 0.06756756756756757, "step": 7000 }, { "epoch": 96.15, "learning_rate": 0.00032051282051282057, "loss": 0.0116, "step": 7500 }, { "epoch": 96.15, "eval_loss": 0.08566667139530182, "eval_runtime": 62.4843, "eval_samples_per_second": 29.719, "eval_steps_per_second": 1.248, "eval_wer": 0.07527691626052282, "step": 7500 }, { "epoch": 102.56, "learning_rate": 0.0003076923076923077, "loss": 0.0139, "step": 8000 }, { "epoch": 102.56, "eval_loss": 0.10782884061336517, "eval_runtime": 62.9655, "eval_samples_per_second": 29.492, "eval_steps_per_second": 1.239, "eval_wer": 0.04811696942844484, "step": 8000 }, { "epoch": 108.97, "learning_rate": 0.0002948717948717949, "loss": 0.0107, "step": 8500 }, { "epoch": 108.97, "eval_loss": 0.09546608477830887, "eval_runtime": 64.1669, "eval_samples_per_second": 28.94, "eval_steps_per_second": 1.216, "eval_wer": 0.06827647319450598, "step": 8500 }, { "epoch": 115.38, "learning_rate": 0.0002820769230769231, "loss": 0.0096, "step": 9000 }, { "epoch": 115.38, "eval_loss": 0.08463115245103836, "eval_runtime": 63.761, "eval_samples_per_second": 29.124, "eval_steps_per_second": 1.223, "eval_wer": 0.06973859105006645, "step": 9000 }, { "epoch": 121.79, "learning_rate": 0.0002692564102564103, "loss": 0.0089, "step": 9500 }, { "epoch": 121.79, "eval_loss": 0.08535169810056686, "eval_runtime": 63.7326, "eval_samples_per_second": 29.137, "eval_steps_per_second": 1.224, "eval_wer": 0.06752326096588392, "step": 9500 }, { "epoch": 128.21, "learning_rate": 0.00025643589743589747, "loss": 0.0084, "step": 10000 }, { "epoch": 128.21, "eval_loss": 0.08750651776790619, "eval_runtime": 61.61, "eval_samples_per_second": 30.141, "eval_steps_per_second": 1.266, "eval_wer": 0.07793531236154187, "step": 10000 }, { "epoch": 134.62, "learning_rate": 0.00024361538461538463, "loss": 0.0074, "step": 10500 }, { "epoch": 134.62, "eval_loss": 0.08403545618057251, "eval_runtime": 60.7541, "eval_samples_per_second": 30.566, "eval_steps_per_second": 1.284, "eval_wer": 0.0770048737261852, "step": 10500 }, { "epoch": 141.03, "learning_rate": 0.00023082051282051282, "loss": 0.0061, "step": 11000 }, { "epoch": 141.03, "eval_loss": 0.09034867584705353, "eval_runtime": 63.3565, "eval_samples_per_second": 29.31, "eval_steps_per_second": 1.231, "eval_wer": 0.07540983606557378, "step": 11000 }, { "epoch": 147.44, "learning_rate": 0.000218, "loss": 0.0076, "step": 11500 }, { "epoch": 147.44, "eval_loss": 0.08722745627164841, "eval_runtime": 63.711, "eval_samples_per_second": 29.147, "eval_steps_per_second": 1.224, "eval_wer": 0.0769162605228179, "step": 11500 }, { "epoch": 153.85, "learning_rate": 0.00020517948717948718, "loss": 0.0069, "step": 12000 }, { "epoch": 153.85, "eval_loss": 0.08911967277526855, "eval_runtime": 61.1855, "eval_samples_per_second": 30.35, "eval_steps_per_second": 1.275, "eval_wer": 0.07722640673460346, "step": 12000 }, { "epoch": 160.26, "learning_rate": 0.00019235897435897437, "loss": 0.0061, "step": 12500 }, { "epoch": 160.26, "eval_loss": 0.09709189832210541, "eval_runtime": 63.0238, "eval_samples_per_second": 29.465, "eval_steps_per_second": 1.238, "eval_wer": 0.07735932653965441, "step": 12500 }, { "epoch": 166.67, "learning_rate": 0.00017953846153846153, "loss": 0.0049, "step": 13000 }, { "epoch": 166.67, "eval_loss": 0.09844444692134857, "eval_runtime": 61.9554, "eval_samples_per_second": 29.973, "eval_steps_per_second": 1.259, "eval_wer": 0.07261852015950376, "step": 13000 }, { "epoch": 173.08, "learning_rate": 0.00016671794871794872, "loss": 0.0045, "step": 13500 }, { "epoch": 173.08, "eval_loss": 0.09517823159694672, "eval_runtime": 61.492, "eval_samples_per_second": 30.199, "eval_steps_per_second": 1.268, "eval_wer": 0.07651750110766505, "step": 13500 }, { "epoch": 179.49, "learning_rate": 0.0001538974358974359, "loss": 0.0039, "step": 14000 }, { "epoch": 179.49, "eval_loss": 0.10154110193252563, "eval_runtime": 61.18, "eval_samples_per_second": 30.353, "eval_steps_per_second": 1.275, "eval_wer": 0.07620735489587949, "step": 14000 }, { "epoch": 185.9, "learning_rate": 0.00014107692307692307, "loss": 0.0031, "step": 14500 }, { "epoch": 185.9, "eval_loss": 0.09374968707561493, "eval_runtime": 61.1162, "eval_samples_per_second": 30.385, "eval_steps_per_second": 1.276, "eval_wer": 0.07124501550731059, "step": 14500 }, { "epoch": 192.31, "learning_rate": 0.00012825641025641026, "loss": 0.0032, "step": 15000 }, { "epoch": 192.31, "eval_loss": 0.09821684658527374, "eval_runtime": 60.073, "eval_samples_per_second": 30.912, "eval_steps_per_second": 1.298, "eval_wer": 0.06353566681435534, "step": 15000 }, { "epoch": 198.72, "learning_rate": 0.00011546153846153847, "loss": 0.0028, "step": 15500 }, { "epoch": 198.72, "eval_loss": 0.098084457218647, "eval_runtime": 61.9773, "eval_samples_per_second": 29.963, "eval_steps_per_second": 1.259, "eval_wer": 0.07434647762516615, "step": 15500 }, { "epoch": 205.13, "learning_rate": 0.00010266666666666668, "loss": 0.0024, "step": 16000 }, { "epoch": 205.13, "eval_loss": 0.10191462188959122, "eval_runtime": 65.8946, "eval_samples_per_second": 28.181, "eval_steps_per_second": 1.184, "eval_wer": 0.07120070890562694, "step": 16000 }, { "epoch": 211.54, "learning_rate": 8.987179487179488e-05, "loss": 0.0024, "step": 16500 }, { "epoch": 211.54, "eval_loss": 0.09566177427768707, "eval_runtime": 64.1104, "eval_samples_per_second": 28.966, "eval_steps_per_second": 1.217, "eval_wer": 0.07323881258307488, "step": 16500 }, { "epoch": 217.95, "learning_rate": 7.705128205128205e-05, "loss": 0.002, "step": 17000 }, { "epoch": 217.95, "eval_loss": 0.09413377195596695, "eval_runtime": 61.0383, "eval_samples_per_second": 30.424, "eval_steps_per_second": 1.278, "eval_wer": 0.07315019937970757, "step": 17000 }, { "epoch": 224.36, "learning_rate": 6.423076923076924e-05, "loss": 0.0015, "step": 17500 }, { "epoch": 224.36, "eval_loss": 0.10090558975934982, "eval_runtime": 61.85, "eval_samples_per_second": 30.024, "eval_steps_per_second": 1.261, "eval_wer": 0.07173238812583074, "step": 17500 }, { "epoch": 230.77, "learning_rate": 5.1410256410256414e-05, "loss": 0.0017, "step": 18000 }, { "epoch": 230.77, "eval_loss": 0.09554142504930496, "eval_runtime": 60.313, "eval_samples_per_second": 30.789, "eval_steps_per_second": 1.293, "eval_wer": 0.07301727957465662, "step": 18000 }, { "epoch": 237.18, "learning_rate": 3.858974358974359e-05, "loss": 0.0013, "step": 18500 }, { "epoch": 237.18, "eval_loss": 0.09889042377471924, "eval_runtime": 67.9718, "eval_samples_per_second": 27.32, "eval_steps_per_second": 1.148, "eval_wer": 0.07315019937970757, "step": 18500 }, { "epoch": 243.59, "learning_rate": 2.576923076923077e-05, "loss": 0.0013, "step": 19000 }, { "epoch": 243.59, "eval_loss": 0.09670563787221909, "eval_runtime": 66.758, "eval_samples_per_second": 27.817, "eval_steps_per_second": 1.168, "eval_wer": 0.07377049180327869, "step": 19000 }, { "epoch": 250.0, "learning_rate": 1.2948717948717948e-05, "loss": 0.0011, "step": 19500 }, { "epoch": 250.0, "eval_loss": 0.0980001762509346, "eval_runtime": 66.2994, "eval_samples_per_second": 28.009, "eval_steps_per_second": 1.176, "eval_wer": 0.07341603898980949, "step": 19500 }, { "epoch": 256.41, "learning_rate": 1.282051282051282e-07, "loss": 0.0008, "step": 20000 }, { "epoch": 256.41, "eval_loss": 0.09882301837205887, "eval_runtime": 65.0891, "eval_samples_per_second": 28.53, "eval_steps_per_second": 1.198, "eval_wer": 0.07359326539654408, "step": 20000 }, { "epoch": 256.41, "step": 20000, "total_flos": 2.406420737737408e+20, "train_loss": 0.06207249406576157, "train_runtime": 86281.9422, "train_samples_per_second": 22.253, "train_steps_per_second": 0.232 } ], "max_steps": 20000, "num_train_epochs": 257, "total_flos": 2.406420737737408e+20, "trial_name": null, "trial_params": null }