{ "best_metric": null, "best_model_checkpoint": null, "epoch": 100.0, "global_step": 7900, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.27, "learning_rate": 1.3719999999999999e-05, "loss": 10.7972, "step": 100 }, { "epoch": 2.53, "learning_rate": 2.772e-05, "loss": 4.2661, "step": 200 }, { "epoch": 3.8, "learning_rate": 4.1719999999999994e-05, "loss": 3.3145, "step": 300 }, { "epoch": 5.06, "learning_rate": 5.5719999999999995e-05, "loss": 3.039, "step": 400 }, { "epoch": 6.33, "learning_rate": 6.971999999999999e-05, "loss": 2.9774, "step": 500 }, { "epoch": 6.33, "eval_loss": 2.9768528938293457, "eval_runtime": 42.649, "eval_samples_per_second": 22.345, "eval_steps_per_second": 0.703, "eval_wer": 1.0, "step": 500 }, { "epoch": 7.59, "learning_rate": 6.907297297297297e-05, "loss": 2.9216, "step": 600 }, { "epoch": 8.86, "learning_rate": 6.812702702702703e-05, "loss": 2.453, "step": 700 }, { "epoch": 10.13, "learning_rate": 6.718108108108108e-05, "loss": 1.7073, "step": 800 }, { "epoch": 11.39, "learning_rate": 6.623513513513513e-05, "loss": 1.4457, "step": 900 }, { "epoch": 12.66, "learning_rate": 6.528918918918918e-05, "loss": 1.3453, "step": 1000 }, { "epoch": 12.66, "eval_loss": 0.6522914171218872, "eval_runtime": 42.9729, "eval_samples_per_second": 22.177, "eval_steps_per_second": 0.698, "eval_wer": 0.6979675258317247, "step": 1000 }, { "epoch": 13.92, "learning_rate": 6.434324324324325e-05, "loss": 1.2867, "step": 1100 }, { "epoch": 15.19, "learning_rate": 6.339729729729728e-05, "loss": 1.2507, "step": 1200 }, { "epoch": 16.46, "learning_rate": 6.245135135135135e-05, "loss": 1.21, "step": 1300 }, { "epoch": 17.72, "learning_rate": 6.15054054054054e-05, "loss": 1.1863, "step": 1400 }, { "epoch": 18.99, "learning_rate": 6.055945945945946e-05, "loss": 1.1658, "step": 1500 }, { "epoch": 18.99, "eval_loss": 0.5635793805122375, "eval_runtime": 43.1337, "eval_samples_per_second": 22.094, "eval_steps_per_second": 0.696, "eval_wer": 0.6358578403542636, "step": 1500 }, { "epoch": 20.25, "learning_rate": 5.9613513513513504e-05, "loss": 1.1476, "step": 1600 }, { "epoch": 21.52, "learning_rate": 5.866756756756756e-05, "loss": 1.1346, "step": 1700 }, { "epoch": 22.78, "learning_rate": 5.772162162162162e-05, "loss": 1.1074, "step": 1800 }, { "epoch": 24.05, "learning_rate": 5.677567567567567e-05, "loss": 1.0892, "step": 1900 }, { "epoch": 25.32, "learning_rate": 5.5829729729729724e-05, "loss": 1.0797, "step": 2000 }, { "epoch": 25.32, "eval_loss": 0.5003558993339539, "eval_runtime": 43.4309, "eval_samples_per_second": 21.943, "eval_steps_per_second": 0.691, "eval_wer": 0.5759055296922903, "step": 2000 }, { "epoch": 26.58, "learning_rate": 5.489324324324324e-05, "loss": 1.0695, "step": 2100 }, { "epoch": 27.85, "learning_rate": 5.394729729729729e-05, "loss": 1.065, "step": 2200 }, { "epoch": 29.11, "learning_rate": 5.300135135135134e-05, "loss": 1.0534, "step": 2300 }, { "epoch": 30.38, "learning_rate": 5.20554054054054e-05, "loss": 1.0323, "step": 2400 }, { "epoch": 31.65, "learning_rate": 5.110945945945946e-05, "loss": 1.044, "step": 2500 }, { "epoch": 31.65, "eval_loss": 0.49577832221984863, "eval_runtime": 42.5512, "eval_samples_per_second": 22.397, "eval_steps_per_second": 0.705, "eval_wer": 0.5569433405245827, "step": 2500 }, { "epoch": 32.91, "learning_rate": 5.0163513513513505e-05, "loss": 1.0187, "step": 2600 }, { "epoch": 34.18, "learning_rate": 4.921756756756756e-05, "loss": 1.0094, "step": 2700 }, { "epoch": 35.44, "learning_rate": 4.8271621621621615e-05, "loss": 1.0002, "step": 2800 }, { "epoch": 36.71, "learning_rate": 4.732567567567567e-05, "loss": 0.9978, "step": 2900 }, { "epoch": 37.97, "learning_rate": 4.6379729729729725e-05, "loss": 0.9915, "step": 3000 }, { "epoch": 37.97, "eval_loss": 0.49712273478507996, "eval_runtime": 41.1197, "eval_samples_per_second": 23.176, "eval_steps_per_second": 0.73, "eval_wer": 0.5350289542409447, "step": 3000 }, { "epoch": 39.24, "learning_rate": 4.5433783783783776e-05, "loss": 0.9786, "step": 3100 }, { "epoch": 40.51, "learning_rate": 4.4487837837837834e-05, "loss": 0.9819, "step": 3200 }, { "epoch": 41.77, "learning_rate": 4.354189189189189e-05, "loss": 0.9735, "step": 3300 }, { "epoch": 43.04, "learning_rate": 4.259594594594594e-05, "loss": 0.9624, "step": 3400 }, { "epoch": 44.3, "learning_rate": 4.1649999999999996e-05, "loss": 0.9429, "step": 3500 }, { "epoch": 44.3, "eval_loss": 0.48288407921791077, "eval_runtime": 41.6865, "eval_samples_per_second": 22.861, "eval_steps_per_second": 0.72, "eval_wer": 0.5228795276484615, "step": 3500 }, { "epoch": 45.57, "learning_rate": 4.070405405405405e-05, "loss": 0.9448, "step": 3600 }, { "epoch": 46.84, "learning_rate": 3.9758108108108106e-05, "loss": 0.9383, "step": 3700 }, { "epoch": 48.1, "learning_rate": 3.8812162162162164e-05, "loss": 0.939, "step": 3800 }, { "epoch": 49.37, "learning_rate": 3.786621621621621e-05, "loss": 0.9322, "step": 3900 }, { "epoch": 50.63, "learning_rate": 3.692027027027027e-05, "loss": 0.9266, "step": 4000 }, { "epoch": 50.63, "eval_loss": 0.4515039622783661, "eval_runtime": 41.6834, "eval_samples_per_second": 22.863, "eval_steps_per_second": 0.72, "eval_wer": 0.5074372658112865, "step": 4000 }, { "epoch": 51.9, "learning_rate": 3.5974324324324326e-05, "loss": 0.9147, "step": 4100 }, { "epoch": 53.16, "learning_rate": 3.502837837837838e-05, "loss": 0.9129, "step": 4200 }, { "epoch": 54.43, "learning_rate": 3.4091891891891893e-05, "loss": 0.9071, "step": 4300 }, { "epoch": 55.7, "learning_rate": 3.3145945945945945e-05, "loss": 0.9098, "step": 4400 }, { "epoch": 56.96, "learning_rate": 3.220945945945946e-05, "loss": 0.8965, "step": 4500 }, { "epoch": 56.96, "eval_loss": 0.45992422103881836, "eval_runtime": 41.9829, "eval_samples_per_second": 22.7, "eval_steps_per_second": 0.715, "eval_wer": 0.5039173384807539, "step": 4500 }, { "epoch": 58.23, "learning_rate": 3.126351351351351e-05, "loss": 0.8992, "step": 4600 }, { "epoch": 59.49, "learning_rate": 3.0327027027027026e-05, "loss": 0.9006, "step": 4700 }, { "epoch": 60.76, "learning_rate": 2.9381081081081077e-05, "loss": 0.8931, "step": 4800 }, { "epoch": 62.03, "learning_rate": 2.8435135135135132e-05, "loss": 0.8846, "step": 4900 }, { "epoch": 63.29, "learning_rate": 2.7489189189189187e-05, "loss": 0.878, "step": 5000 }, { "epoch": 63.29, "eval_loss": 0.4735390543937683, "eval_runtime": 41.3368, "eval_samples_per_second": 23.054, "eval_steps_per_second": 0.726, "eval_wer": 0.4954013852617236, "step": 5000 }, { "epoch": 64.56, "learning_rate": 2.6543243243243242e-05, "loss": 0.8696, "step": 5100 }, { "epoch": 65.82, "learning_rate": 2.5597297297297294e-05, "loss": 0.8756, "step": 5200 }, { "epoch": 67.09, "learning_rate": 2.465135135135135e-05, "loss": 0.8588, "step": 5300 }, { "epoch": 68.35, "learning_rate": 2.3705405405405404e-05, "loss": 0.8616, "step": 5400 }, { "epoch": 69.62, "learning_rate": 2.275945945945946e-05, "loss": 0.8494, "step": 5500 }, { "epoch": 69.62, "eval_loss": 0.44596442580223083, "eval_runtime": 41.8864, "eval_samples_per_second": 22.752, "eval_steps_per_second": 0.716, "eval_wer": 0.4877938003860565, "step": 5500 }, { "epoch": 70.89, "learning_rate": 2.181351351351351e-05, "loss": 0.8535, "step": 5600 }, { "epoch": 72.15, "learning_rate": 2.0867567567567565e-05, "loss": 0.8511, "step": 5700 }, { "epoch": 73.42, "learning_rate": 1.992162162162162e-05, "loss": 0.8436, "step": 5800 }, { "epoch": 74.68, "learning_rate": 1.8975675675675675e-05, "loss": 0.8527, "step": 5900 }, { "epoch": 75.95, "learning_rate": 1.8029729729729727e-05, "loss": 0.8343, "step": 6000 }, { "epoch": 75.95, "eval_loss": 0.4510088264942169, "eval_runtime": 41.5079, "eval_samples_per_second": 22.959, "eval_steps_per_second": 0.723, "eval_wer": 0.479504939252867, "step": 6000 }, { "epoch": 77.22, "learning_rate": 1.708378378378378e-05, "loss": 0.8474, "step": 6100 }, { "epoch": 78.48, "learning_rate": 1.6137837837837837e-05, "loss": 0.83, "step": 6200 }, { "epoch": 79.75, "learning_rate": 1.5191891891891891e-05, "loss": 0.8168, "step": 6300 }, { "epoch": 81.01, "learning_rate": 1.4245945945945945e-05, "loss": 0.8363, "step": 6400 }, { "epoch": 82.28, "learning_rate": 1.33e-05, "loss": 0.8236, "step": 6500 }, { "epoch": 82.28, "eval_loss": 0.4537811577320099, "eval_runtime": 40.81, "eval_samples_per_second": 23.352, "eval_steps_per_second": 0.735, "eval_wer": 0.47893720903826503, "step": 6500 }, { "epoch": 83.54, "learning_rate": 1.2354054054054053e-05, "loss": 0.8189, "step": 6600 }, { "epoch": 84.81, "learning_rate": 1.1408108108108108e-05, "loss": 0.8121, "step": 6700 }, { "epoch": 86.08, "learning_rate": 1.0462162162162161e-05, "loss": 0.8098, "step": 6800 }, { "epoch": 87.34, "learning_rate": 9.516216216216216e-06, "loss": 0.8091, "step": 6900 }, { "epoch": 88.61, "learning_rate": 8.57027027027027e-06, "loss": 0.8069, "step": 7000 }, { "epoch": 88.61, "eval_loss": 0.45264118909835815, "eval_runtime": 41.4122, "eval_samples_per_second": 23.013, "eval_steps_per_second": 0.724, "eval_wer": 0.47484955149313046, "step": 7000 }, { "epoch": 89.87, "learning_rate": 7.6243243243243236e-06, "loss": 0.8042, "step": 7100 }, { "epoch": 91.14, "learning_rate": 6.678378378378378e-06, "loss": 0.8042, "step": 7200 }, { "epoch": 92.41, "learning_rate": 5.732432432432432e-06, "loss": 0.7968, "step": 7300 }, { "epoch": 93.67, "learning_rate": 4.786486486486486e-06, "loss": 0.798, "step": 7400 }, { "epoch": 94.94, "learning_rate": 3.84054054054054e-06, "loss": 0.7958, "step": 7500 }, { "epoch": 94.94, "eval_loss": 0.449596643447876, "eval_runtime": 41.0608, "eval_samples_per_second": 23.209, "eval_steps_per_second": 0.731, "eval_wer": 0.4699670716475531, "step": 7500 }, { "epoch": 96.2, "learning_rate": 2.8945945945945945e-06, "loss": 0.7953, "step": 7600 }, { "epoch": 97.47, "learning_rate": 1.9486486486486487e-06, "loss": 0.7917, "step": 7700 }, { "epoch": 98.73, "learning_rate": 1.0027027027027026e-06, "loss": 0.7881, "step": 7800 }, { "epoch": 100.0, "learning_rate": 5.675675675675675e-08, "loss": 0.795, "step": 7900 }, { "epoch": 100.0, "step": 7900, "total_flos": 3.986125981994791e+19, "train_loss": 1.2487838832034341, "train_runtime": 15247.9542, "train_samples_per_second": 16.383, "train_steps_per_second": 0.518 } ], "max_steps": 7900, "num_train_epochs": 100, "total_flos": 3.986125981994791e+19, "trial_name": null, "trial_params": null }