{ "best_metric": null, "best_model_checkpoint": null, "epoch": 29.997830802603037, "global_step": 6900, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.43, "eval_loss": 4.412876605987549, "eval_runtime": 167.4094, "eval_samples_per_second": 29.264, "eval_steps_per_second": 0.92, "eval_wer": 1.0, "step": 100 }, { "epoch": 0.87, "eval_loss": 3.592686653137207, "eval_runtime": 162.2391, "eval_samples_per_second": 30.196, "eval_steps_per_second": 0.949, "eval_wer": 1.0, "step": 200 }, { "epoch": 1.3, "eval_loss": 3.3779923915863037, "eval_runtime": 162.9178, "eval_samples_per_second": 30.07, "eval_steps_per_second": 0.945, "eval_wer": 1.0, "step": 300 }, { "epoch": 1.74, "eval_loss": 3.0830302238464355, "eval_runtime": 165.7412, "eval_samples_per_second": 29.558, "eval_steps_per_second": 0.929, "eval_wer": 1.0, "step": 400 }, { "epoch": 2.17, "learning_rate": 0.00029759999999999997, "loss": 5.3551, "step": 500 }, { "epoch": 2.17, "eval_loss": 2.6277503967285156, "eval_runtime": 162.7347, "eval_samples_per_second": 30.104, "eval_steps_per_second": 0.946, "eval_wer": 0.999864842947505, "step": 500 }, { "epoch": 2.61, "eval_loss": 1.8358852863311768, "eval_runtime": 165.3543, "eval_samples_per_second": 29.627, "eval_steps_per_second": 0.931, "eval_wer": 0.999972968589501, "step": 600 }, { "epoch": 3.04, "eval_loss": 1.7877882719039917, "eval_runtime": 161.7494, "eval_samples_per_second": 30.288, "eval_steps_per_second": 0.952, "eval_wer": 0.9914040114613181, "step": 700 }, { "epoch": 3.48, "eval_loss": 1.5219440460205078, "eval_runtime": 163.3701, "eval_samples_per_second": 29.987, "eval_steps_per_second": 0.943, "eval_wer": 0.9875114883494621, "step": 800 }, { "epoch": 3.91, "eval_loss": 1.4348260164260864, "eval_runtime": 163.9354, "eval_samples_per_second": 29.884, "eval_steps_per_second": 0.939, "eval_wer": 0.9879439909174461, "step": 900 }, { "epoch": 4.35, "learning_rate": 0.00027675, "loss": 1.7199, "step": 1000 }, { "epoch": 4.35, "eval_loss": 1.4353642463684082, "eval_runtime": 167.0923, "eval_samples_per_second": 29.319, "eval_steps_per_second": 0.922, "eval_wer": 0.9644266637833162, "step": 1000 }, { "epoch": 4.78, "eval_loss": 1.5210459232330322, "eval_runtime": 174.6297, "eval_samples_per_second": 28.054, "eval_steps_per_second": 0.882, "eval_wer": 0.9518840893117803, "step": 1100 }, { "epoch": 5.22, "eval_loss": 1.3606946468353271, "eval_runtime": 164.1085, "eval_samples_per_second": 29.852, "eval_steps_per_second": 0.938, "eval_wer": 0.9474779694004433, "step": 1200 }, { "epoch": 5.65, "eval_loss": 1.383901834487915, "eval_runtime": 163.0064, "eval_samples_per_second": 30.054, "eval_steps_per_second": 0.945, "eval_wer": 0.9343136724874304, "step": 1300 }, { "epoch": 6.09, "eval_loss": 1.2805912494659424, "eval_runtime": 161.8354, "eval_samples_per_second": 30.271, "eval_steps_per_second": 0.952, "eval_wer": 0.8944423420014056, "step": 1400 }, { "epoch": 6.52, "learning_rate": 0.00025331249999999996, "loss": 1.2342, "step": 1500 }, { "epoch": 6.52, "eval_loss": 1.3035786151885986, "eval_runtime": 161.6796, "eval_samples_per_second": 30.301, "eval_steps_per_second": 0.953, "eval_wer": 0.9011191003946586, "step": 1500 }, { "epoch": 6.95, "eval_loss": 1.3704484701156616, "eval_runtime": 162.7459, "eval_samples_per_second": 30.102, "eval_steps_per_second": 0.946, "eval_wer": 0.9072011677569336, "step": 1600 }, { "epoch": 7.39, "eval_loss": 1.298081636428833, "eval_runtime": 160.2526, "eval_samples_per_second": 30.57, "eval_steps_per_second": 0.961, "eval_wer": 0.8890630913121047, "step": 1700 }, { "epoch": 7.82, "eval_loss": 1.278607964515686, "eval_runtime": 160.6135, "eval_samples_per_second": 30.502, "eval_steps_per_second": 0.959, "eval_wer": 0.8733308104016868, "step": 1800 }, { "epoch": 8.26, "eval_loss": 1.2897309064865112, "eval_runtime": 160.1151, "eval_samples_per_second": 30.597, "eval_steps_per_second": 0.962, "eval_wer": 0.8866843271881927, "step": 1900 }, { "epoch": 8.69, "learning_rate": 0.00022987499999999996, "loss": 0.9831, "step": 2000 }, { "epoch": 8.69, "eval_loss": 1.4436370134353638, "eval_runtime": 161.3545, "eval_samples_per_second": 30.362, "eval_steps_per_second": 0.954, "eval_wer": 0.8779531815970157, "step": 2000 }, { "epoch": 9.13, "eval_loss": 1.3680068254470825, "eval_runtime": 162.428, "eval_samples_per_second": 30.161, "eval_steps_per_second": 0.948, "eval_wer": 0.8872519868086717, "step": 2100 }, { "epoch": 9.56, "eval_loss": 1.347055435180664, "eval_runtime": 168.1105, "eval_samples_per_second": 29.142, "eval_steps_per_second": 0.916, "eval_wer": 0.8692490674163378, "step": 2200 }, { "epoch": 10.0, "eval_loss": 1.3724839687347412, "eval_runtime": 161.7991, "eval_samples_per_second": 30.278, "eval_steps_per_second": 0.952, "eval_wer": 0.8729253392442018, "step": 2300 }, { "epoch": 10.43, "eval_loss": 1.4439136981964111, "eval_runtime": 164.3202, "eval_samples_per_second": 29.814, "eval_steps_per_second": 0.937, "eval_wer": 0.8770881764610478, "step": 2400 }, { "epoch": 10.87, "learning_rate": 0.00020643749999999997, "loss": 0.8071, "step": 2500 }, { "epoch": 10.87, "eval_loss": 1.5114314556121826, "eval_runtime": 160.915, "eval_samples_per_second": 30.445, "eval_steps_per_second": 0.957, "eval_wer": 0.8928474887819646, "step": 2500 }, { "epoch": 11.3, "eval_loss": 1.6155508756637573, "eval_runtime": 162.3974, "eval_samples_per_second": 30.167, "eval_steps_per_second": 0.948, "eval_wer": 0.8958479753473536, "step": 2600 }, { "epoch": 11.74, "eval_loss": 1.4381186962127686, "eval_runtime": 160.766, "eval_samples_per_second": 30.473, "eval_steps_per_second": 0.958, "eval_wer": 0.8748716008001297, "step": 2700 }, { "epoch": 12.17, "eval_loss": 1.508760929107666, "eval_runtime": 161.5608, "eval_samples_per_second": 30.323, "eval_steps_per_second": 0.953, "eval_wer": 0.8716818943612478, "step": 2800 }, { "epoch": 12.61, "eval_loss": 1.5485790967941284, "eval_runtime": 163.3707, "eval_samples_per_second": 29.987, "eval_steps_per_second": 0.943, "eval_wer": 0.8812510136778937, "step": 2900 }, { "epoch": 13.04, "learning_rate": 0.00018299999999999998, "loss": 0.6321, "step": 3000 }, { "epoch": 13.04, "eval_loss": 1.4535671472549438, "eval_runtime": 162.8299, "eval_samples_per_second": 30.087, "eval_steps_per_second": 0.946, "eval_wer": 0.8884143374601287, "step": 3000 }, { "epoch": 13.48, "eval_loss": 1.4679176807403564, "eval_runtime": 163.238, "eval_samples_per_second": 30.011, "eval_steps_per_second": 0.943, "eval_wer": 0.8947126561063956, "step": 3100 }, { "epoch": 13.91, "eval_loss": 1.5627696514129639, "eval_runtime": 163.36, "eval_samples_per_second": 29.989, "eval_steps_per_second": 0.943, "eval_wer": 0.9117424447207655, "step": 3200 }, { "epoch": 14.35, "eval_loss": 1.58307683467865, "eval_runtime": 166.5717, "eval_samples_per_second": 29.411, "eval_steps_per_second": 0.925, "eval_wer": 0.8716008001297507, "step": 3300 }, { "epoch": 14.78, "eval_loss": 1.6733046770095825, "eval_runtime": 162.8481, "eval_samples_per_second": 30.083, "eval_steps_per_second": 0.946, "eval_wer": 0.8701951667838028, "step": 3400 }, { "epoch": 15.22, "learning_rate": 0.00015956249999999998, "loss": 0.4998, "step": 3500 }, { "epoch": 15.22, "eval_loss": 1.82253897190094, "eval_runtime": 166.0482, "eval_samples_per_second": 29.503, "eval_steps_per_second": 0.927, "eval_wer": 0.8664648321349409, "step": 3500 }, { "epoch": 15.65, "eval_loss": 1.8557839393615723, "eval_runtime": 164.7566, "eval_samples_per_second": 29.735, "eval_steps_per_second": 0.935, "eval_wer": 0.8732226847596908, "step": 3600 }, { "epoch": 16.09, "eval_loss": 1.7512831687927246, "eval_runtime": 164.2229, "eval_samples_per_second": 29.831, "eval_steps_per_second": 0.938, "eval_wer": 0.8765745796615667, "step": 3700 }, { "epoch": 16.52, "eval_loss": 1.8561654090881348, "eval_runtime": 163.759, "eval_samples_per_second": 29.916, "eval_steps_per_second": 0.94, "eval_wer": 0.8753041033681137, "step": 3800 }, { "epoch": 16.95, "eval_loss": 1.9017548561096191, "eval_runtime": 163.9631, "eval_samples_per_second": 29.879, "eval_steps_per_second": 0.939, "eval_wer": 0.8703573552467968, "step": 3900 }, { "epoch": 17.39, "learning_rate": 0.000136125, "loss": 0.4421, "step": 4000 }, { "epoch": 17.39, "eval_loss": 1.9341310262680054, "eval_runtime": 162.0015, "eval_samples_per_second": 30.24, "eval_steps_per_second": 0.951, "eval_wer": 0.8788992809644808, "step": 4000 }, { "epoch": 17.82, "eval_loss": 1.9581764936447144, "eval_runtime": 163.9276, "eval_samples_per_second": 29.885, "eval_steps_per_second": 0.939, "eval_wer": 0.8781424014705087, "step": 4100 }, { "epoch": 18.26, "eval_loss": 1.8863332271575928, "eval_runtime": 162.6969, "eval_samples_per_second": 30.111, "eval_steps_per_second": 0.947, "eval_wer": 0.8820889874033627, "step": 4200 }, { "epoch": 18.69, "eval_loss": 1.9366161823272705, "eval_runtime": 163.4934, "eval_samples_per_second": 29.965, "eval_steps_per_second": 0.942, "eval_wer": 0.8846569714007677, "step": 4300 }, { "epoch": 19.13, "eval_loss": 2.1901650428771973, "eval_runtime": 166.8301, "eval_samples_per_second": 29.365, "eval_steps_per_second": 0.923, "eval_wer": 0.8721414283397307, "step": 4400 }, { "epoch": 19.56, "learning_rate": 0.00011268749999999998, "loss": 0.3712, "step": 4500 }, { "epoch": 19.56, "eval_loss": 2.164060592651367, "eval_runtime": 162.9234, "eval_samples_per_second": 30.069, "eval_steps_per_second": 0.945, "eval_wer": 0.8670054603449208, "step": 4500 }, { "epoch": 20.0, "eval_loss": 2.163888931274414, "eval_runtime": 164.0497, "eval_samples_per_second": 29.863, "eval_steps_per_second": 0.939, "eval_wer": 0.8776017732605287, "step": 4600 }, { "epoch": 20.43, "eval_loss": 2.269531726837158, "eval_runtime": 161.5615, "eval_samples_per_second": 30.323, "eval_steps_per_second": 0.953, "eval_wer": 0.9029842677190896, "step": 4700 }, { "epoch": 20.87, "eval_loss": 2.1909375190734863, "eval_runtime": 159.4363, "eval_samples_per_second": 30.727, "eval_steps_per_second": 0.966, "eval_wer": 0.8936584310969347, "step": 4800 }, { "epoch": 21.3, "eval_loss": 2.160627603530884, "eval_runtime": 160.8614, "eval_samples_per_second": 30.455, "eval_steps_per_second": 0.957, "eval_wer": 0.8959290695788507, "step": 4900 }, { "epoch": 21.74, "learning_rate": 8.924999999999999e-05, "loss": 0.3067, "step": 5000 }, { "epoch": 21.74, "eval_loss": 2.1755802631378174, "eval_runtime": 160.8247, "eval_samples_per_second": 30.462, "eval_steps_per_second": 0.958, "eval_wer": 0.8943342163594097, "step": 5000 }, { "epoch": 22.17, "eval_loss": 2.409205913543701, "eval_runtime": 160.8564, "eval_samples_per_second": 30.456, "eval_steps_per_second": 0.957, "eval_wer": 0.8772503649240417, "step": 5100 }, { "epoch": 22.61, "eval_loss": 2.499131202697754, "eval_runtime": 163.2327, "eval_samples_per_second": 30.012, "eval_steps_per_second": 0.943, "eval_wer": 0.8721143969292318, "step": 5200 }, { "epoch": 23.04, "eval_loss": 2.3339521884918213, "eval_runtime": 160.7999, "eval_samples_per_second": 30.466, "eval_steps_per_second": 0.958, "eval_wer": 0.8910363842785316, "step": 5300 }, { "epoch": 23.48, "eval_loss": 2.3566715717315674, "eval_runtime": 161.7409, "eval_samples_per_second": 30.289, "eval_steps_per_second": 0.952, "eval_wer": 0.8946045304643996, "step": 5400 }, { "epoch": 23.91, "learning_rate": 6.58125e-05, "loss": 0.2764, "step": 5500 }, { "epoch": 23.91, "eval_loss": 2.3214945793151855, "eval_runtime": 165.7293, "eval_samples_per_second": 29.56, "eval_steps_per_second": 0.929, "eval_wer": 0.8897118451640806, "step": 5500 }, { "epoch": 24.35, "eval_loss": 2.482433795928955, "eval_runtime": 158.7566, "eval_samples_per_second": 30.859, "eval_steps_per_second": 0.97, "eval_wer": 0.9002270638481916, "step": 5600 }, { "epoch": 24.78, "eval_loss": 2.4584639072418213, "eval_runtime": 160.826, "eval_samples_per_second": 30.461, "eval_steps_per_second": 0.958, "eval_wer": 0.8962534465048386, "step": 5700 }, { "epoch": 25.22, "eval_loss": 2.580402135848999, "eval_runtime": 161.3761, "eval_samples_per_second": 30.358, "eval_steps_per_second": 0.954, "eval_wer": 0.8878737092501486, "step": 5800 }, { "epoch": 25.65, "eval_loss": 2.5814249515533447, "eval_runtime": 161.7763, "eval_samples_per_second": 30.283, "eval_steps_per_second": 0.952, "eval_wer": 0.8902795047845596, "step": 5900 }, { "epoch": 26.09, "learning_rate": 4.237499999999999e-05, "loss": 0.2593, "step": 6000 }, { "epoch": 26.09, "eval_loss": 2.5374372005462646, "eval_runtime": 161.3184, "eval_samples_per_second": 30.369, "eval_steps_per_second": 0.955, "eval_wer": 0.8867654214196897, "step": 6000 }, { "epoch": 26.52, "eval_loss": 2.5346157550811768, "eval_runtime": 161.3913, "eval_samples_per_second": 30.355, "eval_steps_per_second": 0.954, "eval_wer": 0.8922257663404877, "step": 6100 }, { "epoch": 26.95, "eval_loss": 2.546508312225342, "eval_runtime": 161.2827, "eval_samples_per_second": 30.375, "eval_steps_per_second": 0.955, "eval_wer": 0.8873060496296696, "step": 6200 }, { "epoch": 27.39, "eval_loss": 2.6001508235931396, "eval_runtime": 161.5241, "eval_samples_per_second": 30.33, "eval_steps_per_second": 0.953, "eval_wer": 0.8919013894144997, "step": 6300 }, { "epoch": 27.82, "eval_loss": 2.610161304473877, "eval_runtime": 159.1491, "eval_samples_per_second": 30.782, "eval_steps_per_second": 0.968, "eval_wer": 0.8927663945504677, "step": 6400 }, { "epoch": 28.26, "learning_rate": 1.89375e-05, "loss": 0.227, "step": 6500 }, { "epoch": 28.26, "eval_loss": 2.692467451095581, "eval_runtime": 161.7243, "eval_samples_per_second": 30.292, "eval_steps_per_second": 0.952, "eval_wer": 0.8914418554360166, "step": 6500 }, { "epoch": 28.69, "eval_loss": 2.6981399059295654, "eval_runtime": 164.1482, "eval_samples_per_second": 29.845, "eval_steps_per_second": 0.938, "eval_wer": 0.8913337297940207, "step": 6600 }, { "epoch": 29.13, "eval_loss": 2.687201499938965, "eval_runtime": 161.4856, "eval_samples_per_second": 30.337, "eval_steps_per_second": 0.954, "eval_wer": 0.8890630913121047, "step": 6700 }, { "epoch": 29.56, "eval_loss": 2.7014529705047607, "eval_runtime": 161.2779, "eval_samples_per_second": 30.376, "eval_steps_per_second": 0.955, "eval_wer": 0.8896577823430827, "step": 6800 }, { "epoch": 30.0, "eval_loss": 2.711408853530884, "eval_runtime": 161.2987, "eval_samples_per_second": 30.372, "eval_steps_per_second": 0.955, "eval_wer": 0.8907931015840407, "step": 6900 }, { "epoch": 30.0, "step": 6900, "total_flos": 4.4600972514253696e+18, "train_loss": 0.9629449507118999, "train_runtime": 26035.0137, "train_samples_per_second": 16.973, "train_steps_per_second": 0.265 } ], "max_steps": 6900, "num_train_epochs": 30, "total_flos": 4.4600972514253696e+18, "trial_name": null, "trial_params": null }