{ "best_metric": null, "best_model_checkpoint": null, "epoch": 59.354838709677416, "global_step": 18400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.29, "learning_rate": 0.00023999999999999998, "loss": 7.1936, "step": 400 }, { "epoch": 1.29, "eval_loss": 3.278684139251709, "eval_runtime": 123.6406, "eval_samples_per_second": 12.31, "eval_wer": 1.0, "step": 400 }, { "epoch": 2.58, "learning_rate": 0.0002950276243093923, "loss": 3.1985, "step": 800 }, { "epoch": 2.58, "eval_loss": 2.8315000534057617, "eval_runtime": 121.9578, "eval_samples_per_second": 12.48, "eval_wer": 0.9966690219037044, "step": 800 }, { "epoch": 3.87, "learning_rate": 0.00028839779005524857, "loss": 1.1566, "step": 1200 }, { "epoch": 3.87, "eval_loss": 0.7793326377868652, "eval_runtime": 113.9019, "eval_samples_per_second": 13.362, "eval_wer": 0.7725850408801858, "step": 1200 }, { "epoch": 5.16, "learning_rate": 0.00028176795580110497, "loss": 0.5297, "step": 1600 }, { "epoch": 5.16, "eval_loss": 0.6474416255950928, "eval_runtime": 122.7887, "eval_samples_per_second": 12.395, "eval_wer": 0.6751791662460886, "step": 1600 }, { "epoch": 6.45, "learning_rate": 0.0002751381215469613, "loss": 0.3725, "step": 2000 }, { "epoch": 6.45, "eval_loss": 0.5674485564231873, "eval_runtime": 114.3739, "eval_samples_per_second": 13.307, "eval_wer": 0.6073483395578884, "step": 2000 }, { "epoch": 7.74, "learning_rate": 0.00026850828729281767, "loss": 0.2877, "step": 2400 }, { "epoch": 7.74, "eval_loss": 0.5630490183830261, "eval_runtime": 114.404, "eval_samples_per_second": 13.304, "eval_wer": 0.5621277884324215, "step": 2400 }, { "epoch": 9.03, "learning_rate": 0.000261878453038674, "loss": 0.2342, "step": 2800 }, { "epoch": 9.03, "eval_loss": 0.5479915142059326, "eval_runtime": 123.0266, "eval_samples_per_second": 12.371, "eval_wer": 0.5403250227112143, "step": 2800 }, { "epoch": 10.32, "learning_rate": 0.00025524861878453036, "loss": 0.1967, "step": 3200 }, { "epoch": 10.32, "eval_loss": 0.6276335120201111, "eval_runtime": 114.8683, "eval_samples_per_second": 13.25, "eval_wer": 0.5577874230342182, "step": 3200 }, { "epoch": 11.61, "learning_rate": 0.0002486187845303867, "loss": 0.1729, "step": 3600 }, { "epoch": 11.61, "eval_loss": 0.536430835723877, "eval_runtime": 114.6009, "eval_samples_per_second": 13.281, "eval_wer": 0.5332593115978601, "step": 3600 }, { "epoch": 12.9, "learning_rate": 0.00024198895027624309, "loss": 0.1607, "step": 4000 }, { "epoch": 12.9, "eval_loss": 0.5107040405273438, "eval_runtime": 115.3852, "eval_samples_per_second": 13.191, "eval_wer": 0.495306349046129, "step": 4000 }, { "epoch": 14.19, "learning_rate": 0.00023535911602209943, "loss": 0.1381, "step": 4400 }, { "epoch": 14.19, "eval_loss": 0.5929794907569885, "eval_runtime": 123.4194, "eval_samples_per_second": 12.332, "eval_wer": 0.5220551125466841, "step": 4400 }, { "epoch": 15.48, "learning_rate": 0.0002287292817679558, "loss": 0.1261, "step": 4800 }, { "epoch": 15.48, "eval_loss": 0.6632552742958069, "eval_runtime": 124.1096, "eval_samples_per_second": 12.263, "eval_wer": 0.5305339658827092, "step": 4800 }, { "epoch": 16.77, "learning_rate": 0.00022209944751381213, "loss": 0.1216, "step": 5200 }, { "epoch": 16.77, "eval_loss": 0.6114311218261719, "eval_runtime": 116.8858, "eval_samples_per_second": 13.021, "eval_wer": 0.5084283839709296, "step": 5200 }, { "epoch": 18.06, "learning_rate": 0.0002154696132596685, "loss": 0.1119, "step": 5600 }, { "epoch": 18.06, "eval_loss": 0.6091165542602539, "eval_runtime": 123.4244, "eval_samples_per_second": 12.331, "eval_wer": 0.5041889573029171, "step": 5600 }, { "epoch": 19.35, "learning_rate": 0.00020883977900552485, "loss": 0.107, "step": 6000 }, { "epoch": 19.35, "eval_loss": 0.5811592936515808, "eval_runtime": 131.8655, "eval_samples_per_second": 11.542, "eval_wer": 0.49581104269708287, "step": 6000 }, { "epoch": 20.65, "learning_rate": 0.00020220994475138123, "loss": 0.0985, "step": 6400 }, { "epoch": 20.65, "eval_loss": 0.6538776755332947, "eval_runtime": 132.6579, "eval_samples_per_second": 11.473, "eval_wer": 0.5082265065105481, "step": 6400 }, { "epoch": 21.94, "learning_rate": 0.00019558011049723755, "loss": 0.0959, "step": 6800 }, { "epoch": 21.94, "eval_loss": 0.602001965045929, "eval_runtime": 132.2249, "eval_samples_per_second": 11.511, "eval_wer": 0.4906631674573534, "step": 6800 }, { "epoch": 23.23, "learning_rate": 0.0001889502762430939, "loss": 0.0899, "step": 7200 }, { "epoch": 23.23, "eval_loss": 0.6006932854652405, "eval_runtime": 115.0032, "eval_samples_per_second": 13.234, "eval_wer": 0.48813969920258404, "step": 7200 }, { "epoch": 24.52, "learning_rate": 0.00018232044198895027, "loss": 0.0867, "step": 7600 }, { "epoch": 24.52, "eval_loss": 0.5921047329902649, "eval_runtime": 115.0629, "eval_samples_per_second": 13.228, "eval_wer": 0.48763500555163014, "step": 7600 }, { "epoch": 25.81, "learning_rate": 0.0001756906077348066, "loss": 0.0825, "step": 8000 }, { "epoch": 25.81, "eval_loss": 0.6151732802391052, "eval_runtime": 115.0579, "eval_samples_per_second": 13.228, "eval_wer": 0.4937922680932674, "step": 8000 }, { "epoch": 27.1, "learning_rate": 0.00016906077348066297, "loss": 0.0768, "step": 8400 }, { "epoch": 27.1, "eval_loss": 0.6089133024215698, "eval_runtime": 123.8125, "eval_samples_per_second": 12.293, "eval_wer": 0.47723831634198044, "step": 8400 }, { "epoch": 28.39, "learning_rate": 0.00016243093922651931, "loss": 0.0698, "step": 8800 }, { "epoch": 28.39, "eval_loss": 0.6180127263069153, "eval_runtime": 123.6525, "eval_samples_per_second": 12.309, "eval_wer": 0.47330170586454023, "step": 8800 }, { "epoch": 29.68, "learning_rate": 0.0001558011049723757, "loss": 0.0753, "step": 9200 }, { "epoch": 29.68, "eval_loss": 0.621383547782898, "eval_runtime": 115.3106, "eval_samples_per_second": 13.199, "eval_wer": 0.4866256182497224, "step": 9200 }, { "epoch": 30.97, "learning_rate": 0.00014917127071823204, "loss": 0.0674, "step": 9600 }, { "epoch": 30.97, "eval_loss": 0.6383547782897949, "eval_runtime": 123.6323, "eval_samples_per_second": 12.311, "eval_wer": 0.4813768042798022, "step": 9600 }, { "epoch": 32.26, "learning_rate": 0.00014254143646408839, "loss": 0.0617, "step": 10000 }, { "epoch": 32.26, "eval_loss": 0.6684080362319946, "eval_runtime": 122.6948, "eval_samples_per_second": 12.405, "eval_wer": 0.47673362269102654, "step": 10000 }, { "epoch": 33.55, "learning_rate": 0.00013591160220994473, "loss": 0.059, "step": 10400 }, { "epoch": 33.55, "eval_loss": 0.6545931696891785, "eval_runtime": 115.3877, "eval_samples_per_second": 13.19, "eval_wer": 0.46734632078328453, "step": 10400 }, { "epoch": 34.84, "learning_rate": 0.0001292817679558011, "loss": 0.0598, "step": 10800 }, { "epoch": 34.84, "eval_loss": 0.6368861794471741, "eval_runtime": 115.1488, "eval_samples_per_second": 13.218, "eval_wer": 0.4668416271323307, "step": 10800 }, { "epoch": 36.13, "learning_rate": 0.00012265193370165746, "loss": 0.0558, "step": 11200 }, { "epoch": 36.13, "eval_loss": 0.6463531851768494, "eval_runtime": 115.1323, "eval_samples_per_second": 13.22, "eval_wer": 0.4656303623700414, "step": 11200 }, { "epoch": 37.42, "learning_rate": 0.0001160220994475138, "loss": 0.0568, "step": 11600 }, { "epoch": 37.42, "eval_loss": 0.6061355471611023, "eval_runtime": 115.4845, "eval_samples_per_second": 13.179, "eval_wer": 0.46966791157767235, "step": 11600 }, { "epoch": 38.71, "learning_rate": 0.00010939226519337017, "loss": 0.05, "step": 12000 }, { "epoch": 38.71, "eval_loss": 0.6069867014884949, "eval_runtime": 115.3997, "eval_samples_per_second": 13.189, "eval_wer": 0.46381346522660744, "step": 12000 }, { "epoch": 40.0, "learning_rate": 0.00010276243093922651, "loss": 0.0489, "step": 12400 }, { "epoch": 40.0, "eval_loss": 0.6435591578483582, "eval_runtime": 115.2889, "eval_samples_per_second": 13.202, "eval_wer": 0.45493085696981933, "step": 12400 }, { "epoch": 41.29, "learning_rate": 9.613259668508287e-05, "loss": 0.0508, "step": 12800 }, { "epoch": 41.29, "eval_loss": 0.6377198696136475, "eval_runtime": 115.7131, "eval_samples_per_second": 13.153, "eval_wer": 0.45493085696981933, "step": 12800 }, { "epoch": 42.58, "learning_rate": 8.950276243093922e-05, "loss": 0.0475, "step": 13200 }, { "epoch": 42.58, "eval_loss": 0.6290169954299927, "eval_runtime": 115.5463, "eval_samples_per_second": 13.172, "eval_wer": 0.45836277379630563, "step": 13200 }, { "epoch": 43.87, "learning_rate": 8.287292817679558e-05, "loss": 0.0415, "step": 13600 }, { "epoch": 43.87, "eval_loss": 0.6694880723953247, "eval_runtime": 115.6201, "eval_samples_per_second": 13.164, "eval_wer": 0.45291208236600383, "step": 13600 }, { "epoch": 45.16, "learning_rate": 7.624309392265193e-05, "loss": 0.0437, "step": 14000 }, { "epoch": 45.16, "eval_loss": 0.5929790139198303, "eval_runtime": 115.6043, "eval_samples_per_second": 13.166, "eval_wer": 0.44594730998284043, "step": 14000 }, { "epoch": 46.45, "learning_rate": 6.961325966850828e-05, "loss": 0.0405, "step": 14400 }, { "epoch": 46.45, "eval_loss": 0.5978022813796997, "eval_runtime": 115.5674, "eval_samples_per_second": 13.17, "eval_wer": 0.44857171696780057, "step": 14400 }, { "epoch": 47.74, "learning_rate": 6.298342541436463e-05, "loss": 0.0373, "step": 14800 }, { "epoch": 47.74, "eval_loss": 0.6167137622833252, "eval_runtime": 129.2696, "eval_samples_per_second": 11.774, "eval_wer": 0.4487735944281821, "step": 14800 }, { "epoch": 49.03, "learning_rate": 5.635359116022099e-05, "loss": 0.0351, "step": 15200 }, { "epoch": 49.03, "eval_loss": 0.6160412430763245, "eval_runtime": 132.6569, "eval_samples_per_second": 11.473, "eval_wer": 0.4417078833148279, "step": 15200 }, { "epoch": 50.32, "learning_rate": 4.9723756906077343e-05, "loss": 0.0373, "step": 15600 }, { "epoch": 50.32, "eval_loss": 0.5868551731109619, "eval_runtime": 116.8754, "eval_samples_per_second": 13.022, "eval_wer": 0.4385787826789139, "step": 15600 }, { "epoch": 51.61, "learning_rate": 4.30939226519337e-05, "loss": 0.0346, "step": 16000 }, { "epoch": 51.61, "eval_loss": 0.6198846101760864, "eval_runtime": 124.1832, "eval_samples_per_second": 12.256, "eval_wer": 0.4357524982335722, "step": 16000 }, { "epoch": 52.9, "learning_rate": 3.646408839779005e-05, "loss": 0.0328, "step": 16400 }, { "epoch": 52.9, "eval_loss": 0.6003885269165039, "eval_runtime": 124.3773, "eval_samples_per_second": 12.237, "eval_wer": 0.43999192490158473, "step": 16400 }, { "epoch": 54.19, "learning_rate": 2.9834254143646404e-05, "loss": 0.0315, "step": 16800 }, { "epoch": 54.19, "eval_loss": 0.6082204580307007, "eval_runtime": 124.1939, "eval_samples_per_second": 12.255, "eval_wer": 0.4374684566468154, "step": 16800 }, { "epoch": 55.48, "learning_rate": 2.320441988950276e-05, "loss": 0.0312, "step": 17200 }, { "epoch": 55.48, "eval_loss": 0.6157354116439819, "eval_runtime": 124.1732, "eval_samples_per_second": 12.257, "eval_wer": 0.4346421722014737, "step": 17200 }, { "epoch": 56.77, "learning_rate": 1.6574585635359113e-05, "loss": 0.031, "step": 17600 }, { "epoch": 56.77, "eval_loss": 0.6112752556800842, "eval_runtime": 125.7635, "eval_samples_per_second": 12.102, "eval_wer": 0.4303018068032704, "step": 17600 }, { "epoch": 58.06, "learning_rate": 9.944751381215468e-06, "loss": 0.0296, "step": 18000 }, { "epoch": 58.06, "eval_loss": 0.6031031608581543, "eval_runtime": 125.68, "eval_samples_per_second": 12.11, "eval_wer": 0.4288886645805996, "step": 18000 }, { "epoch": 59.35, "learning_rate": 3.3149171270718227e-06, "loss": 0.0287, "step": 18400 }, { "epoch": 59.35, "eval_loss": 0.6061920523643494, "eval_runtime": 124.5102, "eval_samples_per_second": 12.224, "eval_wer": 0.428686787120218, "step": 18400 } ], "max_steps": 18600, "num_train_epochs": 60, "total_flos": 2.8159583015375258e+19, "trial_name": null, "trial_params": null }