{ "best_metric": null, "best_model_checkpoint": null, "epoch": 100.0, "global_step": 5100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.96, "learning_rate": 7.5e-05, "loss": 1.2991, "step": 100 }, { "epoch": 1.96, "eval_loss": 0.9768741130828857, "eval_runtime": 13.5978, "eval_samples_per_second": 25.078, "eval_steps_per_second": 3.162, "eval_wer": 0.6627306273062731, "step": 100 }, { "epoch": 3.92, "learning_rate": 7.35e-05, "loss": 1.3415, "step": 200 }, { "epoch": 3.92, "eval_loss": 0.9700897932052612, "eval_runtime": 13.6932, "eval_samples_per_second": 24.903, "eval_steps_per_second": 3.14, "eval_wer": 0.6594095940959409, "step": 200 }, { "epoch": 5.88, "learning_rate": 7.199999999999999e-05, "loss": 1.2998, "step": 300 }, { "epoch": 5.88, "eval_loss": 0.9677999019622803, "eval_runtime": 13.5745, "eval_samples_per_second": 25.121, "eval_steps_per_second": 3.168, "eval_wer": 0.6667896678966789, "step": 300 }, { "epoch": 7.84, "learning_rate": 7.049999999999999e-05, "loss": 1.2881, "step": 400 }, { "epoch": 7.84, "eval_loss": 0.9650289416313171, "eval_runtime": 13.184, "eval_samples_per_second": 25.865, "eval_steps_per_second": 3.262, "eval_wer": 0.6612546125461255, "step": 400 }, { "epoch": 9.8, "learning_rate": 6.9e-05, "loss": 1.2369, "step": 500 }, { "epoch": 9.8, "eval_loss": 0.9391834735870361, "eval_runtime": 13.3537, "eval_samples_per_second": 25.536, "eval_steps_per_second": 3.22, "eval_wer": 0.6501845018450184, "step": 500 }, { "epoch": 11.76, "learning_rate": 6.75e-05, "loss": 1.2293, "step": 600 }, { "epoch": 11.76, "eval_loss": 0.953644335269928, "eval_runtime": 13.3175, "eval_samples_per_second": 25.605, "eval_steps_per_second": 3.229, "eval_wer": 0.647970479704797, "step": 600 }, { "epoch": 13.73, "learning_rate": 6.599999999999999e-05, "loss": 1.1709, "step": 700 }, { "epoch": 13.73, "eval_loss": 0.9265403151512146, "eval_runtime": 13.1912, "eval_samples_per_second": 25.851, "eval_steps_per_second": 3.26, "eval_wer": 0.6402214022140221, "step": 700 }, { "epoch": 15.69, "learning_rate": 6.45e-05, "loss": 1.1492, "step": 800 }, { "epoch": 15.69, "eval_loss": 0.9636203646659851, "eval_runtime": 13.2659, "eval_samples_per_second": 25.705, "eval_steps_per_second": 3.241, "eval_wer": 0.6505535055350553, "step": 800 }, { "epoch": 17.65, "learning_rate": 6.299999999999999e-05, "loss": 1.1044, "step": 900 }, { "epoch": 17.65, "eval_loss": 0.9304668307304382, "eval_runtime": 13.2356, "eval_samples_per_second": 25.764, "eval_steps_per_second": 3.249, "eval_wer": 0.6350553505535055, "step": 900 }, { "epoch": 19.61, "learning_rate": 6.149999999999999e-05, "loss": 1.0704, "step": 1000 }, { "epoch": 19.61, "eval_loss": 0.9329050779342651, "eval_runtime": 14.4895, "eval_samples_per_second": 23.534, "eval_steps_per_second": 2.968, "eval_wer": 0.6280442804428045, "step": 1000 }, { "epoch": 21.57, "learning_rate": 5.9999999999999995e-05, "loss": 1.0039, "step": 1100 }, { "epoch": 21.57, "eval_loss": 0.9413442015647888, "eval_runtime": 13.1291, "eval_samples_per_second": 25.973, "eval_steps_per_second": 3.275, "eval_wer": 0.629520295202952, "step": 1100 }, { "epoch": 23.53, "learning_rate": 5.85e-05, "loss": 0.9756, "step": 1200 }, { "epoch": 23.53, "eval_loss": 0.9717814326286316, "eval_runtime": 13.2074, "eval_samples_per_second": 25.819, "eval_steps_per_second": 3.256, "eval_wer": 0.618450184501845, "step": 1200 }, { "epoch": 25.49, "learning_rate": 5.7014999999999995e-05, "loss": 0.9633, "step": 1300 }, { "epoch": 25.49, "eval_loss": 0.9730961918830872, "eval_runtime": 14.0128, "eval_samples_per_second": 24.335, "eval_steps_per_second": 3.069, "eval_wer": 0.6132841328413284, "step": 1300 }, { "epoch": 27.45, "learning_rate": 5.551499999999999e-05, "loss": 0.932, "step": 1400 }, { "epoch": 27.45, "eval_loss": 0.9659466743469238, "eval_runtime": 13.4417, "eval_samples_per_second": 25.369, "eval_steps_per_second": 3.199, "eval_wer": 0.6199261992619927, "step": 1400 }, { "epoch": 29.41, "learning_rate": 5.401499999999999e-05, "loss": 0.9252, "step": 1500 }, { "epoch": 29.41, "eval_loss": 0.9766208529472351, "eval_runtime": 13.236, "eval_samples_per_second": 25.763, "eval_steps_per_second": 3.249, "eval_wer": 0.6195571955719558, "step": 1500 }, { "epoch": 31.37, "learning_rate": 5.2515e-05, "loss": 0.9172, "step": 1600 }, { "epoch": 31.37, "eval_loss": 1.005155086517334, "eval_runtime": 13.4612, "eval_samples_per_second": 25.332, "eval_steps_per_second": 3.194, "eval_wer": 0.6199261992619927, "step": 1600 }, { "epoch": 33.33, "learning_rate": 5.1015e-05, "loss": 0.8733, "step": 1700 }, { "epoch": 33.33, "eval_loss": 0.9955308437347412, "eval_runtime": 13.2228, "eval_samples_per_second": 25.789, "eval_steps_per_second": 3.252, "eval_wer": 0.6202952029520296, "step": 1700 }, { "epoch": 35.29, "learning_rate": 4.9514999999999996e-05, "loss": 0.868, "step": 1800 }, { "epoch": 35.29, "eval_loss": 1.0068942308425903, "eval_runtime": 13.1716, "eval_samples_per_second": 25.889, "eval_steps_per_second": 3.265, "eval_wer": 0.6239852398523985, "step": 1800 }, { "epoch": 37.25, "learning_rate": 4.8014999999999993e-05, "loss": 0.8547, "step": 1900 }, { "epoch": 37.25, "eval_loss": 0.9782727360725403, "eval_runtime": 13.4477, "eval_samples_per_second": 25.357, "eval_steps_per_second": 3.198, "eval_wer": 0.625830258302583, "step": 1900 }, { "epoch": 39.22, "learning_rate": 4.651499999999999e-05, "loss": 0.8451, "step": 2000 }, { "epoch": 39.22, "eval_loss": 0.9844875931739807, "eval_runtime": 13.5311, "eval_samples_per_second": 25.201, "eval_steps_per_second": 3.178, "eval_wer": 0.6051660516605166, "step": 2000 }, { "epoch": 41.18, "learning_rate": 4.5014999999999995e-05, "loss": 0.8374, "step": 2100 }, { "epoch": 41.18, "eval_loss": 0.9495627880096436, "eval_runtime": 13.1208, "eval_samples_per_second": 25.989, "eval_steps_per_second": 3.277, "eval_wer": 0.6136531365313653, "step": 2100 }, { "epoch": 43.14, "learning_rate": 4.353e-05, "loss": 0.8153, "step": 2200 }, { "epoch": 43.14, "eval_loss": 0.9756118655204773, "eval_runtime": 13.2792, "eval_samples_per_second": 25.679, "eval_steps_per_second": 3.238, "eval_wer": 0.6121771217712177, "step": 2200 }, { "epoch": 45.1, "learning_rate": 4.2029999999999996e-05, "loss": 0.8134, "step": 2300 }, { "epoch": 45.1, "eval_loss": 0.9711871147155762, "eval_runtime": 13.4607, "eval_samples_per_second": 25.333, "eval_steps_per_second": 3.194, "eval_wer": 0.6095940959409594, "step": 2300 }, { "epoch": 47.06, "learning_rate": 4.052999999999999e-05, "loss": 0.8019, "step": 2400 }, { "epoch": 47.06, "eval_loss": 0.9564995765686035, "eval_runtime": 13.1971, "eval_samples_per_second": 25.839, "eval_steps_per_second": 3.258, "eval_wer": 0.5970479704797048, "step": 2400 }, { "epoch": 49.02, "learning_rate": 3.903e-05, "loss": 0.7746, "step": 2500 }, { "epoch": 49.02, "eval_loss": 0.9864395260810852, "eval_runtime": 13.2769, "eval_samples_per_second": 25.684, "eval_steps_per_second": 3.239, "eval_wer": 0.6095940959409594, "step": 2500 }, { "epoch": 50.98, "learning_rate": 3.7529999999999995e-05, "loss": 0.7664, "step": 2600 }, { "epoch": 50.98, "eval_loss": 0.9988436698913574, "eval_runtime": 13.1392, "eval_samples_per_second": 25.953, "eval_steps_per_second": 3.273, "eval_wer": 0.6092250922509225, "step": 2600 }, { "epoch": 52.94, "learning_rate": 3.603e-05, "loss": 0.7708, "step": 2700 }, { "epoch": 52.94, "eval_loss": 1.0180705785751343, "eval_runtime": 13.5466, "eval_samples_per_second": 25.172, "eval_steps_per_second": 3.174, "eval_wer": 0.6254612546125461, "step": 2700 }, { "epoch": 54.9, "learning_rate": 3.4529999999999996e-05, "loss": 0.7468, "step": 2800 }, { "epoch": 54.9, "eval_loss": 0.9917659759521484, "eval_runtime": 13.328, "eval_samples_per_second": 25.585, "eval_steps_per_second": 3.226, "eval_wer": 0.614760147601476, "step": 2800 }, { "epoch": 56.86, "learning_rate": 3.303e-05, "loss": 0.7241, "step": 2900 }, { "epoch": 56.86, "eval_loss": 1.015049934387207, "eval_runtime": 13.1895, "eval_samples_per_second": 25.854, "eval_steps_per_second": 3.26, "eval_wer": 0.6018450184501845, "step": 2900 }, { "epoch": 58.82, "learning_rate": 3.153e-05, "loss": 0.7165, "step": 3000 }, { "epoch": 58.82, "eval_loss": 1.0438742637634277, "eval_runtime": 13.2139, "eval_samples_per_second": 25.806, "eval_steps_per_second": 3.254, "eval_wer": 0.6062730627306273, "step": 3000 }, { "epoch": 60.78, "learning_rate": 3.0029999999999995e-05, "loss": 0.7104, "step": 3100 }, { "epoch": 60.78, "eval_loss": 1.0015809535980225, "eval_runtime": 13.0489, "eval_samples_per_second": 26.133, "eval_steps_per_second": 3.295, "eval_wer": 0.603690036900369, "step": 3100 }, { "epoch": 62.75, "learning_rate": 2.853e-05, "loss": 0.6954, "step": 3200 }, { "epoch": 62.75, "eval_loss": 1.0116804838180542, "eval_runtime": 13.1802, "eval_samples_per_second": 25.872, "eval_steps_per_second": 3.262, "eval_wer": 0.5970479704797048, "step": 3200 }, { "epoch": 64.71, "learning_rate": 2.7029999999999997e-05, "loss": 0.6753, "step": 3300 }, { "epoch": 64.71, "eval_loss": 1.019060492515564, "eval_runtime": 13.2987, "eval_samples_per_second": 25.642, "eval_steps_per_second": 3.233, "eval_wer": 0.603690036900369, "step": 3300 }, { "epoch": 66.67, "learning_rate": 2.5529999999999998e-05, "loss": 0.6803, "step": 3400 }, { "epoch": 66.67, "eval_loss": 1.0190043449401855, "eval_runtime": 13.4724, "eval_samples_per_second": 25.311, "eval_steps_per_second": 3.192, "eval_wer": 0.6033210332103321, "step": 3400 }, { "epoch": 68.63, "learning_rate": 2.403e-05, "loss": 0.661, "step": 3500 }, { "epoch": 68.63, "eval_loss": 1.0283905267715454, "eval_runtime": 13.7543, "eval_samples_per_second": 24.792, "eval_steps_per_second": 3.126, "eval_wer": 0.6007380073800738, "step": 3500 }, { "epoch": 70.59, "learning_rate": 2.253e-05, "loss": 0.6597, "step": 3600 }, { "epoch": 70.59, "eval_loss": 1.0060473680496216, "eval_runtime": 13.2986, "eval_samples_per_second": 25.642, "eval_steps_per_second": 3.233, "eval_wer": 0.5966789667896679, "step": 3600 }, { "epoch": 72.55, "learning_rate": 2.1029999999999997e-05, "loss": 0.6398, "step": 3700 }, { "epoch": 72.55, "eval_loss": 1.0372449159622192, "eval_runtime": 13.4322, "eval_samples_per_second": 25.387, "eval_steps_per_second": 3.201, "eval_wer": 0.6047970479704797, "step": 3700 }, { "epoch": 74.51, "learning_rate": 1.953e-05, "loss": 0.6105, "step": 3800 }, { "epoch": 74.51, "eval_loss": 1.0047756433486938, "eval_runtime": 13.2181, "eval_samples_per_second": 25.798, "eval_steps_per_second": 3.253, "eval_wer": 0.6044280442804428, "step": 3800 }, { "epoch": 76.47, "learning_rate": 1.803e-05, "loss": 0.6164, "step": 3900 }, { "epoch": 76.47, "eval_loss": 1.0398120880126953, "eval_runtime": 13.4893, "eval_samples_per_second": 25.279, "eval_steps_per_second": 3.188, "eval_wer": 0.614760147601476, "step": 3900 }, { "epoch": 78.43, "learning_rate": 1.653e-05, "loss": 0.6354, "step": 4000 }, { "epoch": 78.43, "eval_loss": 1.0271726846694946, "eval_runtime": 13.2868, "eval_samples_per_second": 25.665, "eval_steps_per_second": 3.236, "eval_wer": 0.6132841328413284, "step": 4000 }, { "epoch": 80.39, "learning_rate": 1.5029999999999998e-05, "loss": 0.5952, "step": 4100 }, { "epoch": 80.39, "eval_loss": 1.0364222526550293, "eval_runtime": 13.255, "eval_samples_per_second": 25.726, "eval_steps_per_second": 3.244, "eval_wer": 0.6081180811808118, "step": 4100 }, { "epoch": 82.35, "learning_rate": 1.353e-05, "loss": 0.5814, "step": 4200 }, { "epoch": 82.35, "eval_loss": 1.0418034791946411, "eval_runtime": 13.1269, "eval_samples_per_second": 25.977, "eval_steps_per_second": 3.276, "eval_wer": 0.6092250922509225, "step": 4200 }, { "epoch": 84.31, "learning_rate": 1.2029999999999998e-05, "loss": 0.6079, "step": 4300 }, { "epoch": 84.31, "eval_loss": 1.0277141332626343, "eval_runtime": 13.3962, "eval_samples_per_second": 25.455, "eval_steps_per_second": 3.21, "eval_wer": 0.5966789667896679, "step": 4300 }, { "epoch": 86.27, "learning_rate": 1.0529999999999999e-05, "loss": 0.5748, "step": 4400 }, { "epoch": 86.27, "eval_loss": 1.03615140914917, "eval_runtime": 13.3767, "eval_samples_per_second": 25.492, "eval_steps_per_second": 3.215, "eval_wer": 0.6040590405904059, "step": 4400 }, { "epoch": 88.24, "learning_rate": 9.029999999999998e-06, "loss": 0.5624, "step": 4500 }, { "epoch": 88.24, "eval_loss": 1.042688250541687, "eval_runtime": 13.1572, "eval_samples_per_second": 25.917, "eval_steps_per_second": 3.268, "eval_wer": 0.6007380073800738, "step": 4500 }, { "epoch": 90.2, "learning_rate": 7.53e-06, "loss": 0.5767, "step": 4600 }, { "epoch": 90.2, "eval_loss": 1.037009835243225, "eval_runtime": 13.3232, "eval_samples_per_second": 25.594, "eval_steps_per_second": 3.227, "eval_wer": 0.5918819188191882, "step": 4600 }, { "epoch": 92.16, "learning_rate": 6.029999999999999e-06, "loss": 0.5793, "step": 4700 }, { "epoch": 92.16, "eval_loss": 1.0441827774047852, "eval_runtime": 13.4001, "eval_samples_per_second": 25.447, "eval_steps_per_second": 3.209, "eval_wer": 0.6011070110701107, "step": 4700 }, { "epoch": 94.12, "learning_rate": 4.53e-06, "loss": 0.547, "step": 4800 }, { "epoch": 94.12, "eval_loss": 1.0516060590744019, "eval_runtime": 13.1503, "eval_samples_per_second": 25.931, "eval_steps_per_second": 3.27, "eval_wer": 0.5981549815498155, "step": 4800 }, { "epoch": 96.08, "learning_rate": 3.03e-06, "loss": 0.5513, "step": 4900 }, { "epoch": 96.08, "eval_loss": 1.0460669994354248, "eval_runtime": 13.2162, "eval_samples_per_second": 25.802, "eval_steps_per_second": 3.254, "eval_wer": 0.5988929889298893, "step": 4900 }, { "epoch": 98.04, "learning_rate": 1.53e-06, "loss": 0.5429, "step": 5000 }, { "epoch": 98.04, "eval_loss": 1.0503703355789185, "eval_runtime": 13.1041, "eval_samples_per_second": 26.022, "eval_steps_per_second": 3.281, "eval_wer": 0.5996309963099631, "step": 5000 }, { "epoch": 100.0, "learning_rate": 3e-08, "loss": 0.5404, "step": 5100 }, { "epoch": 100.0, "eval_loss": 1.0516693592071533, "eval_runtime": 13.1507, "eval_samples_per_second": 25.93, "eval_steps_per_second": 3.27, "eval_wer": 0.5966789667896679, "step": 5100 }, { "epoch": 100.0, "step": 5100, "total_flos": 9.838577578075728e+18, "train_loss": 0.8227130358826881, "train_runtime": 4841.344, "train_samples_per_second": 16.731, "train_steps_per_second": 1.053 } ], "max_steps": 5100, "num_train_epochs": 100, "total_flos": 9.838577578075728e+18, "trial_name": null, "trial_params": null }