|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 59.354838709677416, |
|
"global_step": 18400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.00023999999999999998, |
|
"loss": 7.1936, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"eval_loss": 3.278684139251709, |
|
"eval_runtime": 123.6406, |
|
"eval_samples_per_second": 12.31, |
|
"eval_wer": 1.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 0.0002950276243093923, |
|
"loss": 3.1985, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"eval_loss": 2.8315000534057617, |
|
"eval_runtime": 121.9578, |
|
"eval_samples_per_second": 12.48, |
|
"eval_wer": 0.9966690219037044, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 0.00028839779005524857, |
|
"loss": 1.1566, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"eval_loss": 0.7793326377868652, |
|
"eval_runtime": 113.9019, |
|
"eval_samples_per_second": 13.362, |
|
"eval_wer": 0.7725850408801858, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 5.16, |
|
"learning_rate": 0.00028176795580110497, |
|
"loss": 0.5297, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 5.16, |
|
"eval_loss": 0.6474416255950928, |
|
"eval_runtime": 122.7887, |
|
"eval_samples_per_second": 12.395, |
|
"eval_wer": 0.6751791662460886, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 0.0002751381215469613, |
|
"loss": 0.3725, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"eval_loss": 0.5674485564231873, |
|
"eval_runtime": 114.3739, |
|
"eval_samples_per_second": 13.307, |
|
"eval_wer": 0.6073483395578884, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"learning_rate": 0.00026850828729281767, |
|
"loss": 0.2877, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"eval_loss": 0.5630490183830261, |
|
"eval_runtime": 114.404, |
|
"eval_samples_per_second": 13.304, |
|
"eval_wer": 0.5621277884324215, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 0.000261878453038674, |
|
"loss": 0.2342, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"eval_loss": 0.5479915142059326, |
|
"eval_runtime": 123.0266, |
|
"eval_samples_per_second": 12.371, |
|
"eval_wer": 0.5403250227112143, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 10.32, |
|
"learning_rate": 0.00025524861878453036, |
|
"loss": 0.1967, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 10.32, |
|
"eval_loss": 0.6276335120201111, |
|
"eval_runtime": 114.8683, |
|
"eval_samples_per_second": 13.25, |
|
"eval_wer": 0.5577874230342182, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 11.61, |
|
"learning_rate": 0.0002486187845303867, |
|
"loss": 0.1729, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 11.61, |
|
"eval_loss": 0.536430835723877, |
|
"eval_runtime": 114.6009, |
|
"eval_samples_per_second": 13.281, |
|
"eval_wer": 0.5332593115978601, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"learning_rate": 0.00024198895027624309, |
|
"loss": 0.1607, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"eval_loss": 0.5107040405273438, |
|
"eval_runtime": 115.3852, |
|
"eval_samples_per_second": 13.191, |
|
"eval_wer": 0.495306349046129, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 14.19, |
|
"learning_rate": 0.00023535911602209943, |
|
"loss": 0.1381, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 14.19, |
|
"eval_loss": 0.5929794907569885, |
|
"eval_runtime": 123.4194, |
|
"eval_samples_per_second": 12.332, |
|
"eval_wer": 0.5220551125466841, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 15.48, |
|
"learning_rate": 0.0002287292817679558, |
|
"loss": 0.1261, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 15.48, |
|
"eval_loss": 0.6632552742958069, |
|
"eval_runtime": 124.1096, |
|
"eval_samples_per_second": 12.263, |
|
"eval_wer": 0.5305339658827092, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 16.77, |
|
"learning_rate": 0.00022209944751381213, |
|
"loss": 0.1216, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 16.77, |
|
"eval_loss": 0.6114311218261719, |
|
"eval_runtime": 116.8858, |
|
"eval_samples_per_second": 13.021, |
|
"eval_wer": 0.5084283839709296, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 18.06, |
|
"learning_rate": 0.0002154696132596685, |
|
"loss": 0.1119, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 18.06, |
|
"eval_loss": 0.6091165542602539, |
|
"eval_runtime": 123.4244, |
|
"eval_samples_per_second": 12.331, |
|
"eval_wer": 0.5041889573029171, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 19.35, |
|
"learning_rate": 0.00020883977900552485, |
|
"loss": 0.107, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 19.35, |
|
"eval_loss": 0.5811592936515808, |
|
"eval_runtime": 131.8655, |
|
"eval_samples_per_second": 11.542, |
|
"eval_wer": 0.49581104269708287, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 20.65, |
|
"learning_rate": 0.00020220994475138123, |
|
"loss": 0.0985, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 20.65, |
|
"eval_loss": 0.6538776755332947, |
|
"eval_runtime": 132.6579, |
|
"eval_samples_per_second": 11.473, |
|
"eval_wer": 0.5082265065105481, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 21.94, |
|
"learning_rate": 0.00019558011049723755, |
|
"loss": 0.0959, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 21.94, |
|
"eval_loss": 0.602001965045929, |
|
"eval_runtime": 132.2249, |
|
"eval_samples_per_second": 11.511, |
|
"eval_wer": 0.4906631674573534, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 23.23, |
|
"learning_rate": 0.0001889502762430939, |
|
"loss": 0.0899, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 23.23, |
|
"eval_loss": 0.6006932854652405, |
|
"eval_runtime": 115.0032, |
|
"eval_samples_per_second": 13.234, |
|
"eval_wer": 0.48813969920258404, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 24.52, |
|
"learning_rate": 0.00018232044198895027, |
|
"loss": 0.0867, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 24.52, |
|
"eval_loss": 0.5921047329902649, |
|
"eval_runtime": 115.0629, |
|
"eval_samples_per_second": 13.228, |
|
"eval_wer": 0.48763500555163014, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 25.81, |
|
"learning_rate": 0.0001756906077348066, |
|
"loss": 0.0825, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 25.81, |
|
"eval_loss": 0.6151732802391052, |
|
"eval_runtime": 115.0579, |
|
"eval_samples_per_second": 13.228, |
|
"eval_wer": 0.4937922680932674, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 27.1, |
|
"learning_rate": 0.00016906077348066297, |
|
"loss": 0.0768, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 27.1, |
|
"eval_loss": 0.6089133024215698, |
|
"eval_runtime": 123.8125, |
|
"eval_samples_per_second": 12.293, |
|
"eval_wer": 0.47723831634198044, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 28.39, |
|
"learning_rate": 0.00016243093922651931, |
|
"loss": 0.0698, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 28.39, |
|
"eval_loss": 0.6180127263069153, |
|
"eval_runtime": 123.6525, |
|
"eval_samples_per_second": 12.309, |
|
"eval_wer": 0.47330170586454023, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 29.68, |
|
"learning_rate": 0.0001558011049723757, |
|
"loss": 0.0753, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 29.68, |
|
"eval_loss": 0.621383547782898, |
|
"eval_runtime": 115.3106, |
|
"eval_samples_per_second": 13.199, |
|
"eval_wer": 0.4866256182497224, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 30.97, |
|
"learning_rate": 0.00014917127071823204, |
|
"loss": 0.0674, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 30.97, |
|
"eval_loss": 0.6383547782897949, |
|
"eval_runtime": 123.6323, |
|
"eval_samples_per_second": 12.311, |
|
"eval_wer": 0.4813768042798022, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 32.26, |
|
"learning_rate": 0.00014254143646408839, |
|
"loss": 0.0617, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 32.26, |
|
"eval_loss": 0.6684080362319946, |
|
"eval_runtime": 122.6948, |
|
"eval_samples_per_second": 12.405, |
|
"eval_wer": 0.47673362269102654, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 33.55, |
|
"learning_rate": 0.00013591160220994473, |
|
"loss": 0.059, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 33.55, |
|
"eval_loss": 0.6545931696891785, |
|
"eval_runtime": 115.3877, |
|
"eval_samples_per_second": 13.19, |
|
"eval_wer": 0.46734632078328453, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 34.84, |
|
"learning_rate": 0.0001292817679558011, |
|
"loss": 0.0598, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 34.84, |
|
"eval_loss": 0.6368861794471741, |
|
"eval_runtime": 115.1488, |
|
"eval_samples_per_second": 13.218, |
|
"eval_wer": 0.4668416271323307, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 36.13, |
|
"learning_rate": 0.00012265193370165746, |
|
"loss": 0.0558, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 36.13, |
|
"eval_loss": 0.6463531851768494, |
|
"eval_runtime": 115.1323, |
|
"eval_samples_per_second": 13.22, |
|
"eval_wer": 0.4656303623700414, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 37.42, |
|
"learning_rate": 0.0001160220994475138, |
|
"loss": 0.0568, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 37.42, |
|
"eval_loss": 0.6061355471611023, |
|
"eval_runtime": 115.4845, |
|
"eval_samples_per_second": 13.179, |
|
"eval_wer": 0.46966791157767235, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 38.71, |
|
"learning_rate": 0.00010939226519337017, |
|
"loss": 0.05, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 38.71, |
|
"eval_loss": 0.6069867014884949, |
|
"eval_runtime": 115.3997, |
|
"eval_samples_per_second": 13.189, |
|
"eval_wer": 0.46381346522660744, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 0.00010276243093922651, |
|
"loss": 0.0489, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_loss": 0.6435591578483582, |
|
"eval_runtime": 115.2889, |
|
"eval_samples_per_second": 13.202, |
|
"eval_wer": 0.45493085696981933, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 41.29, |
|
"learning_rate": 9.613259668508287e-05, |
|
"loss": 0.0508, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 41.29, |
|
"eval_loss": 0.6377198696136475, |
|
"eval_runtime": 115.7131, |
|
"eval_samples_per_second": 13.153, |
|
"eval_wer": 0.45493085696981933, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 42.58, |
|
"learning_rate": 8.950276243093922e-05, |
|
"loss": 0.0475, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 42.58, |
|
"eval_loss": 0.6290169954299927, |
|
"eval_runtime": 115.5463, |
|
"eval_samples_per_second": 13.172, |
|
"eval_wer": 0.45836277379630563, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 43.87, |
|
"learning_rate": 8.287292817679558e-05, |
|
"loss": 0.0415, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 43.87, |
|
"eval_loss": 0.6694880723953247, |
|
"eval_runtime": 115.6201, |
|
"eval_samples_per_second": 13.164, |
|
"eval_wer": 0.45291208236600383, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 45.16, |
|
"learning_rate": 7.624309392265193e-05, |
|
"loss": 0.0437, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 45.16, |
|
"eval_loss": 0.5929790139198303, |
|
"eval_runtime": 115.6043, |
|
"eval_samples_per_second": 13.166, |
|
"eval_wer": 0.44594730998284043, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 46.45, |
|
"learning_rate": 6.961325966850828e-05, |
|
"loss": 0.0405, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 46.45, |
|
"eval_loss": 0.5978022813796997, |
|
"eval_runtime": 115.5674, |
|
"eval_samples_per_second": 13.17, |
|
"eval_wer": 0.44857171696780057, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 47.74, |
|
"learning_rate": 6.298342541436463e-05, |
|
"loss": 0.0373, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 47.74, |
|
"eval_loss": 0.6167137622833252, |
|
"eval_runtime": 129.2696, |
|
"eval_samples_per_second": 11.774, |
|
"eval_wer": 0.4487735944281821, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 49.03, |
|
"learning_rate": 5.635359116022099e-05, |
|
"loss": 0.0351, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 49.03, |
|
"eval_loss": 0.6160412430763245, |
|
"eval_runtime": 132.6569, |
|
"eval_samples_per_second": 11.473, |
|
"eval_wer": 0.4417078833148279, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 50.32, |
|
"learning_rate": 4.9723756906077343e-05, |
|
"loss": 0.0373, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 50.32, |
|
"eval_loss": 0.5868551731109619, |
|
"eval_runtime": 116.8754, |
|
"eval_samples_per_second": 13.022, |
|
"eval_wer": 0.4385787826789139, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 51.61, |
|
"learning_rate": 4.30939226519337e-05, |
|
"loss": 0.0346, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 51.61, |
|
"eval_loss": 0.6198846101760864, |
|
"eval_runtime": 124.1832, |
|
"eval_samples_per_second": 12.256, |
|
"eval_wer": 0.4357524982335722, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 52.9, |
|
"learning_rate": 3.646408839779005e-05, |
|
"loss": 0.0328, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 52.9, |
|
"eval_loss": 0.6003885269165039, |
|
"eval_runtime": 124.3773, |
|
"eval_samples_per_second": 12.237, |
|
"eval_wer": 0.43999192490158473, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 54.19, |
|
"learning_rate": 2.9834254143646404e-05, |
|
"loss": 0.0315, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 54.19, |
|
"eval_loss": 0.6082204580307007, |
|
"eval_runtime": 124.1939, |
|
"eval_samples_per_second": 12.255, |
|
"eval_wer": 0.4374684566468154, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 55.48, |
|
"learning_rate": 2.320441988950276e-05, |
|
"loss": 0.0312, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 55.48, |
|
"eval_loss": 0.6157354116439819, |
|
"eval_runtime": 124.1732, |
|
"eval_samples_per_second": 12.257, |
|
"eval_wer": 0.4346421722014737, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 56.77, |
|
"learning_rate": 1.6574585635359113e-05, |
|
"loss": 0.031, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 56.77, |
|
"eval_loss": 0.6112752556800842, |
|
"eval_runtime": 125.7635, |
|
"eval_samples_per_second": 12.102, |
|
"eval_wer": 0.4303018068032704, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 58.06, |
|
"learning_rate": 9.944751381215468e-06, |
|
"loss": 0.0296, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 58.06, |
|
"eval_loss": 0.6031031608581543, |
|
"eval_runtime": 125.68, |
|
"eval_samples_per_second": 12.11, |
|
"eval_wer": 0.4288886645805996, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 59.35, |
|
"learning_rate": 3.3149171270718227e-06, |
|
"loss": 0.0287, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 59.35, |
|
"eval_loss": 0.6061920523643494, |
|
"eval_runtime": 124.5102, |
|
"eval_samples_per_second": 12.224, |
|
"eval_wer": 0.428686787120218, |
|
"step": 18400 |
|
} |
|
], |
|
"max_steps": 18600, |
|
"num_train_epochs": 60, |
|
"total_flos": 2.8159583015375258e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|