{ "best_metric": null, "best_model_checkpoint": null, "epoch": 256.4102564102564, "global_step": 20000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 6.41, "learning_rate": 0.000499, "loss": 1.4757, "step": 500 }, { "epoch": 6.41, "eval_loss": 0.061434268951416016, "eval_runtime": 68.9385, "eval_samples_per_second": 26.937, "eval_steps_per_second": 1.131, "eval_wer": 0.03469206911829863, "step": 500 }, { "epoch": 12.82, "learning_rate": 0.0004872051282051282, "loss": 0.0624, "step": 1000 }, { "epoch": 12.82, "eval_loss": 0.05250529572367668, "eval_runtime": 68.4693, "eval_samples_per_second": 27.122, "eval_steps_per_second": 1.139, "eval_wer": 0.027735932653965442, "step": 1000 }, { "epoch": 19.23, "learning_rate": 0.0004743846153846154, "loss": 0.0388, "step": 1500 }, { "epoch": 19.23, "eval_loss": 0.06932416558265686, "eval_runtime": 67.8311, "eval_samples_per_second": 27.377, "eval_steps_per_second": 1.15, "eval_wer": 0.024147097917589722, "step": 1500 }, { "epoch": 25.64, "learning_rate": 0.0004615641025641026, "loss": 0.03, "step": 2000 }, { "epoch": 25.64, "eval_loss": 0.0665554478764534, "eval_runtime": 69.0525, "eval_samples_per_second": 26.893, "eval_steps_per_second": 1.13, "eval_wer": 0.024412937527691626, "step": 2000 }, { "epoch": 32.05, "learning_rate": 0.0004487435897435898, "loss": 0.0235, "step": 2500 }, { "epoch": 32.05, "eval_loss": 0.06039978191256523, "eval_runtime": 67.8702, "eval_samples_per_second": 27.361, "eval_steps_per_second": 1.149, "eval_wer": 0.026007975188303056, "step": 2500 }, { "epoch": 38.46, "learning_rate": 0.0004359230769230769, "loss": 0.0226, "step": 3000 }, { "epoch": 38.46, "eval_loss": 0.06249115616083145, "eval_runtime": 67.3401, "eval_samples_per_second": 27.576, "eval_steps_per_second": 1.158, "eval_wer": 0.022950819672131147, "step": 3000 }, { "epoch": 44.87, "learning_rate": 0.0004231025641025641, "loss": 0.0163, "step": 3500 }, { "epoch": 44.87, "eval_loss": 0.06026700139045715, "eval_runtime": 67.7207, "eval_samples_per_second": 27.421, "eval_steps_per_second": 1.152, "eval_wer": 0.01953921134249003, "step": 3500 }, { "epoch": 51.28, "learning_rate": 0.0004102820512820513, "loss": 0.0157, "step": 4000 }, { "epoch": 51.28, "eval_loss": 0.0627710148692131, "eval_runtime": 67.8528, "eval_samples_per_second": 27.368, "eval_steps_per_second": 1.15, "eval_wer": 0.02091271599468321, "step": 4000 }, { "epoch": 57.69, "learning_rate": 0.0003974615384615385, "loss": 0.0152, "step": 4500 }, { "epoch": 57.69, "eval_loss": 0.06921559572219849, "eval_runtime": 67.9506, "eval_samples_per_second": 27.329, "eval_steps_per_second": 1.148, "eval_wer": 0.023836951705804164, "step": 4500 }, { "epoch": 64.1, "learning_rate": 0.00038464102564102567, "loss": 0.0122, "step": 5000 }, { "epoch": 64.1, "eval_loss": 0.06069288030266762, "eval_runtime": 67.6564, "eval_samples_per_second": 27.448, "eval_steps_per_second": 1.153, "eval_wer": 0.02100132919805051, "step": 5000 }, { "epoch": 70.51, "learning_rate": 0.0003718205128205128, "loss": 0.011, "step": 5500 }, { "epoch": 70.51, "eval_loss": 0.06082445755600929, "eval_runtime": 68.2551, "eval_samples_per_second": 27.207, "eval_steps_per_second": 1.143, "eval_wer": 0.021311475409836064, "step": 5500 }, { "epoch": 76.92, "learning_rate": 0.000359, "loss": 0.0114, "step": 6000 }, { "epoch": 76.92, "eval_loss": 0.06809388101100922, "eval_runtime": 68.8171, "eval_samples_per_second": 26.985, "eval_steps_per_second": 1.133, "eval_wer": 0.021089942401417813, "step": 6000 }, { "epoch": 83.33, "learning_rate": 0.00034620512820512825, "loss": 0.0106, "step": 6500 }, { "epoch": 83.33, "eval_loss": 0.061287231743335724, "eval_runtime": 67.6622, "eval_samples_per_second": 27.445, "eval_steps_per_second": 1.153, "eval_wer": 0.02100132919805051, "step": 6500 }, { "epoch": 89.74, "learning_rate": 0.0003333846153846154, "loss": 0.0081, "step": 7000 }, { "epoch": 89.74, "eval_loss": 0.06544984877109528, "eval_runtime": 68.1647, "eval_samples_per_second": 27.243, "eval_steps_per_second": 1.144, "eval_wer": 0.019627824545857334, "step": 7000 }, { "epoch": 96.15, "learning_rate": 0.00032056410256410257, "loss": 0.0078, "step": 7500 }, { "epoch": 96.15, "eval_loss": 0.06121820956468582, "eval_runtime": 68.399, "eval_samples_per_second": 27.15, "eval_steps_per_second": 1.14, "eval_wer": 0.01905183872396987, "step": 7500 }, { "epoch": 102.56, "learning_rate": 0.00030774358974358976, "loss": 0.0082, "step": 8000 }, { "epoch": 102.56, "eval_loss": 0.07580074667930603, "eval_runtime": 67.4807, "eval_samples_per_second": 27.519, "eval_steps_per_second": 1.156, "eval_wer": 0.02365972529906956, "step": 8000 }, { "epoch": 108.97, "learning_rate": 0.0002949230769230769, "loss": 0.0078, "step": 8500 }, { "epoch": 108.97, "eval_loss": 0.06641749292612076, "eval_runtime": 67.4225, "eval_samples_per_second": 27.543, "eval_steps_per_second": 1.157, "eval_wer": 0.02060256978289765, "step": 8500 }, { "epoch": 115.38, "learning_rate": 0.00028210256410256414, "loss": 0.0075, "step": 9000 }, { "epoch": 115.38, "eval_loss": 0.06580852717161179, "eval_runtime": 67.9347, "eval_samples_per_second": 27.335, "eval_steps_per_second": 1.148, "eval_wer": 0.019716437749224634, "step": 9000 }, { "epoch": 121.79, "learning_rate": 0.0002692820512820513, "loss": 0.0052, "step": 9500 }, { "epoch": 121.79, "eval_loss": 0.06690431386232376, "eval_runtime": 66.8582, "eval_samples_per_second": 27.775, "eval_steps_per_second": 1.167, "eval_wer": 0.021843154630039874, "step": 9500 }, { "epoch": 128.21, "learning_rate": 0.00025646153846153847, "loss": 0.0054, "step": 10000 }, { "epoch": 128.21, "eval_loss": 0.06948971748352051, "eval_runtime": 67.1148, "eval_samples_per_second": 27.669, "eval_steps_per_second": 1.162, "eval_wer": 0.021089942401417813, "step": 10000 }, { "epoch": 134.62, "learning_rate": 0.00024364102564102563, "loss": 0.0053, "step": 10500 }, { "epoch": 134.62, "eval_loss": 0.07259159535169601, "eval_runtime": 66.7066, "eval_samples_per_second": 27.838, "eval_steps_per_second": 1.169, "eval_wer": 0.022684980062029243, "step": 10500 }, { "epoch": 141.03, "learning_rate": 0.00023082051282051282, "loss": 0.0046, "step": 11000 }, { "epoch": 141.03, "eval_loss": 0.07016939669847488, "eval_runtime": 68.2063, "eval_samples_per_second": 27.226, "eval_steps_per_second": 1.144, "eval_wer": 0.021178555604785113, "step": 11000 }, { "epoch": 147.44, "learning_rate": 0.000218, "loss": 0.0043, "step": 11500 }, { "epoch": 147.44, "eval_loss": 0.08461024612188339, "eval_runtime": 66.3999, "eval_samples_per_second": 27.967, "eval_steps_per_second": 1.175, "eval_wer": 0.020026583961010192, "step": 11500 }, { "epoch": 153.85, "learning_rate": 0.00020517948717948718, "loss": 0.0041, "step": 12000 }, { "epoch": 153.85, "eval_loss": 0.07643292099237442, "eval_runtime": 66.6168, "eval_samples_per_second": 27.876, "eval_steps_per_second": 1.171, "eval_wer": 0.020026583961010192, "step": 12000 }, { "epoch": 160.26, "learning_rate": 0.00019235897435897437, "loss": 0.0032, "step": 12500 }, { "epoch": 160.26, "eval_loss": 0.0785411074757576, "eval_runtime": 67.7953, "eval_samples_per_second": 27.391, "eval_steps_per_second": 1.151, "eval_wer": 0.02007089056269384, "step": 12500 }, { "epoch": 166.67, "learning_rate": 0.0001795641025641026, "loss": 0.0028, "step": 13000 }, { "epoch": 166.67, "eval_loss": 0.08392436057329178, "eval_runtime": 67.1052, "eval_samples_per_second": 27.673, "eval_steps_per_second": 1.162, "eval_wer": 0.019672131147540985, "step": 13000 }, { "epoch": 173.08, "learning_rate": 0.00016674358974358975, "loss": 0.0035, "step": 13500 }, { "epoch": 173.08, "eval_loss": 0.07846853882074356, "eval_runtime": 66.3462, "eval_samples_per_second": 27.99, "eval_steps_per_second": 1.176, "eval_wer": 0.02100132919805051, "step": 13500 }, { "epoch": 179.49, "learning_rate": 0.00015394871794871794, "loss": 0.0027, "step": 14000 }, { "epoch": 179.49, "eval_loss": 0.07303400337696075, "eval_runtime": 66.6716, "eval_samples_per_second": 27.853, "eval_steps_per_second": 1.17, "eval_wer": 0.018785999113867965, "step": 14000 }, { "epoch": 185.9, "learning_rate": 0.00014112820512820513, "loss": 0.002, "step": 14500 }, { "epoch": 185.9, "eval_loss": 0.07940459251403809, "eval_runtime": 66.4899, "eval_samples_per_second": 27.929, "eval_steps_per_second": 1.173, "eval_wer": 0.019317678334071775, "step": 14500 }, { "epoch": 192.31, "learning_rate": 0.00012830769230769232, "loss": 0.002, "step": 15000 }, { "epoch": 192.31, "eval_loss": 0.08587377518415451, "eval_runtime": 67.8686, "eval_samples_per_second": 27.362, "eval_steps_per_second": 1.149, "eval_wer": 0.02113424900310146, "step": 15000 }, { "epoch": 198.72, "learning_rate": 0.00011548717948717949, "loss": 0.0019, "step": 15500 }, { "epoch": 198.72, "eval_loss": 0.07269652187824249, "eval_runtime": 66.1737, "eval_samples_per_second": 28.063, "eval_steps_per_second": 1.179, "eval_wer": 0.018342933097031458, "step": 15500 }, { "epoch": 205.13, "learning_rate": 0.00010266666666666668, "loss": 0.0017, "step": 16000 }, { "epoch": 205.13, "eval_loss": 0.07843895256519318, "eval_runtime": 67.2939, "eval_samples_per_second": 27.595, "eval_steps_per_second": 1.159, "eval_wer": 0.018653079308817013, "step": 16000 }, { "epoch": 211.54, "learning_rate": 8.984615384615384e-05, "loss": 0.0016, "step": 16500 }, { "epoch": 211.54, "eval_loss": 0.08008446544408798, "eval_runtime": 67.2442, "eval_samples_per_second": 27.616, "eval_steps_per_second": 1.16, "eval_wer": 0.019627824545857334, "step": 16500 }, { "epoch": 217.95, "learning_rate": 7.702564102564103e-05, "loss": 0.0014, "step": 17000 }, { "epoch": 217.95, "eval_loss": 0.0820729061961174, "eval_runtime": 67.2612, "eval_samples_per_second": 27.609, "eval_steps_per_second": 1.16, "eval_wer": 0.01847585290208241, "step": 17000 }, { "epoch": 224.36, "learning_rate": 6.420512820512821e-05, "loss": 0.0011, "step": 17500 }, { "epoch": 224.36, "eval_loss": 0.08215450495481491, "eval_runtime": 67.1514, "eval_samples_per_second": 27.654, "eval_steps_per_second": 1.162, "eval_wer": 0.017634027470093044, "step": 17500 }, { "epoch": 230.77, "learning_rate": 5.1384615384615385e-05, "loss": 0.001, "step": 18000 }, { "epoch": 230.77, "eval_loss": 0.0855555310845375, "eval_runtime": 67.2619, "eval_samples_per_second": 27.609, "eval_steps_per_second": 1.16, "eval_wer": 0.017058041648205582, "step": 18000 }, { "epoch": 237.18, "learning_rate": 3.856410256410257e-05, "loss": 0.001, "step": 18500 }, { "epoch": 237.18, "eval_loss": 0.07919255644083023, "eval_runtime": 66.733, "eval_samples_per_second": 27.827, "eval_steps_per_second": 1.169, "eval_wer": 0.017589720868409393, "step": 18500 }, { "epoch": 243.59, "learning_rate": 2.574358974358974e-05, "loss": 0.001, "step": 19000 }, { "epoch": 243.59, "eval_loss": 0.08259344846010208, "eval_runtime": 67.3537, "eval_samples_per_second": 27.571, "eval_steps_per_second": 1.158, "eval_wer": 0.017279574656623838, "step": 19000 }, { "epoch": 250.0, "learning_rate": 1.2923076923076924e-05, "loss": 0.0006, "step": 19500 }, { "epoch": 250.0, "eval_loss": 0.08542538434267044, "eval_runtime": 66.6818, "eval_samples_per_second": 27.849, "eval_steps_per_second": 1.17, "eval_wer": 0.016969428444838282, "step": 19500 }, { "epoch": 256.41, "learning_rate": 1.282051282051282e-07, "loss": 0.0007, "step": 20000 }, { "epoch": 256.41, "eval_loss": 0.08499366790056229, "eval_runtime": 66.4242, "eval_samples_per_second": 27.957, "eval_steps_per_second": 1.174, "eval_wer": 0.016747895436420027, "step": 20000 }, { "epoch": 256.41, "step": 20000, "total_flos": 2.406420737737408e+20, "train_loss": 0.04605116495639086, "train_runtime": 87629.7749, "train_samples_per_second": 21.91, "train_steps_per_second": 0.228 } ], "max_steps": 20000, "num_train_epochs": 257, "total_flos": 2.406420737737408e+20, "trial_name": null, "trial_params": null }