|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 500.0, |
|
"global_step": 11000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 3.675e-06, |
|
"loss": 15.8714, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 7.425e-06, |
|
"loss": 8.905, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 13.64, |
|
"learning_rate": 1.1174999999999999e-05, |
|
"loss": 5.8648, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 18.18, |
|
"learning_rate": 1.4925e-05, |
|
"loss": 4.9504, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 18.18, |
|
"eval_loss": 4.673031330108643, |
|
"eval_runtime": 13.8453, |
|
"eval_samples_per_second": 22.101, |
|
"eval_steps_per_second": 1.445, |
|
"eval_wer": 1.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 22.73, |
|
"learning_rate": 1.8675e-05, |
|
"loss": 4.2429, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 27.27, |
|
"learning_rate": 2.2424999999999996e-05, |
|
"loss": 3.72, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 31.82, |
|
"learning_rate": 2.6174999999999996e-05, |
|
"loss": 3.4596, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 36.36, |
|
"learning_rate": 2.9925e-05, |
|
"loss": 3.3766, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 36.36, |
|
"eval_loss": 3.3464324474334717, |
|
"eval_runtime": 13.5196, |
|
"eval_samples_per_second": 22.634, |
|
"eval_steps_per_second": 1.479, |
|
"eval_wer": 1.0, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 40.91, |
|
"learning_rate": 3.3675e-05, |
|
"loss": 3.3035, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 45.45, |
|
"learning_rate": 3.7424999999999995e-05, |
|
"loss": 3.2559, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 4.1175e-05, |
|
"loss": 3.2202, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 54.55, |
|
"learning_rate": 4.4924999999999994e-05, |
|
"loss": 3.1128, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 54.55, |
|
"eval_loss": 3.0176751613616943, |
|
"eval_runtime": 13.6849, |
|
"eval_samples_per_second": 22.36, |
|
"eval_steps_per_second": 1.461, |
|
"eval_wer": 0.9979943842759728, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 59.09, |
|
"learning_rate": 4.8675e-05, |
|
"loss": 2.9121, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 63.64, |
|
"learning_rate": 5.2424999999999994e-05, |
|
"loss": 2.4741, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 68.18, |
|
"learning_rate": 5.6175e-05, |
|
"loss": 2.0461, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 72.73, |
|
"learning_rate": 5.9925e-05, |
|
"loss": 1.7966, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 72.73, |
|
"eval_loss": 0.8733049631118774, |
|
"eval_runtime": 13.6227, |
|
"eval_samples_per_second": 22.463, |
|
"eval_steps_per_second": 1.468, |
|
"eval_wer": 0.8038507821901324, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 77.27, |
|
"learning_rate": 6.367499999999999e-05, |
|
"loss": 1.6388, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 81.82, |
|
"learning_rate": 6.7425e-05, |
|
"loss": 1.5466, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 86.36, |
|
"learning_rate": 7.1175e-05, |
|
"loss": 1.461, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 90.91, |
|
"learning_rate": 7.48875e-05, |
|
"loss": 1.4085, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 90.91, |
|
"eval_loss": 0.5555232763290405, |
|
"eval_runtime": 13.4681, |
|
"eval_samples_per_second": 22.72, |
|
"eval_steps_per_second": 1.485, |
|
"eval_wer": 0.645808263136783, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 95.45, |
|
"learning_rate": 7.419166666666666e-05, |
|
"loss": 1.3233, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 7.335833333333333e-05, |
|
"loss": 1.2821, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 104.55, |
|
"learning_rate": 7.252499999999999e-05, |
|
"loss": 1.2194, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 109.09, |
|
"learning_rate": 7.169166666666666e-05, |
|
"loss": 1.1731, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 109.09, |
|
"eval_loss": 0.4929651618003845, |
|
"eval_runtime": 13.4716, |
|
"eval_samples_per_second": 22.715, |
|
"eval_steps_per_second": 1.485, |
|
"eval_wer": 0.6438026474127557, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 113.64, |
|
"learning_rate": 7.085833333333333e-05, |
|
"loss": 1.135, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 118.18, |
|
"learning_rate": 7.0025e-05, |
|
"loss": 1.1009, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 122.73, |
|
"learning_rate": 6.919166666666666e-05, |
|
"loss": 1.0712, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 127.27, |
|
"learning_rate": 6.835833333333332e-05, |
|
"loss": 1.0271, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 127.27, |
|
"eval_loss": 0.47801950573921204, |
|
"eval_runtime": 13.4955, |
|
"eval_samples_per_second": 22.674, |
|
"eval_steps_per_second": 1.482, |
|
"eval_wer": 0.6093060569594866, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 131.82, |
|
"learning_rate": 6.753333333333333e-05, |
|
"loss": 1.0016, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 136.36, |
|
"learning_rate": 6.67e-05, |
|
"loss": 0.9638, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 140.91, |
|
"learning_rate": 6.586666666666666e-05, |
|
"loss": 0.9327, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 145.45, |
|
"learning_rate": 6.503333333333333e-05, |
|
"loss": 0.9045, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 145.45, |
|
"eval_loss": 0.46472442150115967, |
|
"eval_runtime": 13.3444, |
|
"eval_samples_per_second": 22.931, |
|
"eval_steps_per_second": 1.499, |
|
"eval_wer": 0.6578419574809466, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 150.0, |
|
"learning_rate": 6.419999999999999e-05, |
|
"loss": 0.8865, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 154.55, |
|
"learning_rate": 6.336666666666666e-05, |
|
"loss": 0.8703, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 159.09, |
|
"learning_rate": 6.253333333333333e-05, |
|
"loss": 0.8379, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 163.64, |
|
"learning_rate": 6.17e-05, |
|
"loss": 0.807, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 163.64, |
|
"eval_loss": 0.4505322575569153, |
|
"eval_runtime": 13.3663, |
|
"eval_samples_per_second": 22.893, |
|
"eval_steps_per_second": 1.496, |
|
"eval_wer": 0.5924588848776574, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 168.18, |
|
"learning_rate": 6.0866666666666664e-05, |
|
"loss": 0.805, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 172.73, |
|
"learning_rate": 6.003333333333333e-05, |
|
"loss": 0.7889, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 177.27, |
|
"learning_rate": 5.9199999999999996e-05, |
|
"loss": 0.764, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 181.82, |
|
"learning_rate": 5.8366666666666665e-05, |
|
"loss": 0.741, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 181.82, |
|
"eval_loss": 0.4745561182498932, |
|
"eval_runtime": 13.2681, |
|
"eval_samples_per_second": 23.063, |
|
"eval_steps_per_second": 1.507, |
|
"eval_wer": 0.6024869634977938, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 186.36, |
|
"learning_rate": 5.753333333333333e-05, |
|
"loss": 0.7192, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 190.91, |
|
"learning_rate": 5.6699999999999996e-05, |
|
"loss": 0.6977, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 195.45, |
|
"learning_rate": 5.5866666666666665e-05, |
|
"loss": 0.6882, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"learning_rate": 5.503333333333333e-05, |
|
"loss": 0.6706, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"eval_loss": 0.5003653168678284, |
|
"eval_runtime": 13.3324, |
|
"eval_samples_per_second": 22.952, |
|
"eval_steps_per_second": 1.5, |
|
"eval_wer": 0.5844364219815483, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 204.55, |
|
"learning_rate": 5.4199999999999996e-05, |
|
"loss": 0.6627, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 209.09, |
|
"learning_rate": 5.3366666666666665e-05, |
|
"loss": 0.6453, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 213.64, |
|
"learning_rate": 5.253333333333333e-05, |
|
"loss": 0.6311, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 218.18, |
|
"learning_rate": 5.1699999999999996e-05, |
|
"loss": 0.6186, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 218.18, |
|
"eval_loss": 0.4983522891998291, |
|
"eval_runtime": 13.4916, |
|
"eval_samples_per_second": 22.681, |
|
"eval_steps_per_second": 1.482, |
|
"eval_wer": 0.5996791014841556, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 222.73, |
|
"learning_rate": 5.086666666666666e-05, |
|
"loss": 0.5976, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 227.27, |
|
"learning_rate": 5.003333333333333e-05, |
|
"loss": 0.5832, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 231.82, |
|
"learning_rate": 4.9199999999999997e-05, |
|
"loss": 0.5663, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 236.36, |
|
"learning_rate": 4.836666666666666e-05, |
|
"loss": 0.5508, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 236.36, |
|
"eval_loss": 0.5298363566398621, |
|
"eval_runtime": 13.4008, |
|
"eval_samples_per_second": 22.834, |
|
"eval_steps_per_second": 1.492, |
|
"eval_wer": 0.5635780184516647, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 240.91, |
|
"learning_rate": 4.753333333333333e-05, |
|
"loss": 0.554, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 245.45, |
|
"learning_rate": 4.67e-05, |
|
"loss": 0.543, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 250.0, |
|
"learning_rate": 4.586666666666666e-05, |
|
"loss": 0.5221, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 254.55, |
|
"learning_rate": 4.503333333333333e-05, |
|
"loss": 0.5123, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 254.55, |
|
"eval_loss": 0.5410219430923462, |
|
"eval_runtime": 13.3891, |
|
"eval_samples_per_second": 22.854, |
|
"eval_steps_per_second": 1.494, |
|
"eval_wer": 0.51103088648215, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 259.09, |
|
"learning_rate": 4.42e-05, |
|
"loss": 0.5023, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 263.64, |
|
"learning_rate": 4.336666666666666e-05, |
|
"loss": 0.4854, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 268.18, |
|
"learning_rate": 4.253333333333333e-05, |
|
"loss": 0.4795, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 272.73, |
|
"learning_rate": 4.17e-05, |
|
"loss": 0.4623, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 272.73, |
|
"eval_loss": 0.5591160655021667, |
|
"eval_runtime": 13.3234, |
|
"eval_samples_per_second": 22.967, |
|
"eval_steps_per_second": 1.501, |
|
"eval_wer": 0.538307260328921, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 277.27, |
|
"learning_rate": 4.086666666666666e-05, |
|
"loss": 0.4593, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 281.82, |
|
"learning_rate": 4.003333333333333e-05, |
|
"loss": 0.4435, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 286.36, |
|
"learning_rate": 3.919999999999999e-05, |
|
"loss": 0.4432, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 290.91, |
|
"learning_rate": 3.836666666666666e-05, |
|
"loss": 0.4281, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 290.91, |
|
"eval_loss": 0.5775493383407593, |
|
"eval_runtime": 13.3759, |
|
"eval_samples_per_second": 22.877, |
|
"eval_steps_per_second": 1.495, |
|
"eval_wer": 0.5599679101484155, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 295.45, |
|
"learning_rate": 3.753333333333333e-05, |
|
"loss": 0.4264, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 300.0, |
|
"learning_rate": 3.67e-05, |
|
"loss": 0.4187, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 304.55, |
|
"learning_rate": 3.586666666666666e-05, |
|
"loss": 0.3965, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 309.09, |
|
"learning_rate": 3.503333333333333e-05, |
|
"loss": 0.4045, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 309.09, |
|
"eval_loss": 0.5923808217048645, |
|
"eval_runtime": 13.6647, |
|
"eval_samples_per_second": 22.394, |
|
"eval_steps_per_second": 1.464, |
|
"eval_wer": 0.5579622944243883, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 313.64, |
|
"learning_rate": 3.42e-05, |
|
"loss": 0.3857, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 318.18, |
|
"learning_rate": 3.336666666666667e-05, |
|
"loss": 0.373, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 322.73, |
|
"learning_rate": 3.253333333333333e-05, |
|
"loss": 0.3752, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 327.27, |
|
"learning_rate": 3.17e-05, |
|
"loss": 0.3651, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 327.27, |
|
"eval_loss": 0.567070484161377, |
|
"eval_runtime": 13.4998, |
|
"eval_samples_per_second": 22.667, |
|
"eval_steps_per_second": 1.482, |
|
"eval_wer": 0.5683914961893302, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 331.82, |
|
"learning_rate": 3.086666666666667e-05, |
|
"loss": 0.3567, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 336.36, |
|
"learning_rate": 3.003333333333333e-05, |
|
"loss": 0.3551, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 340.91, |
|
"learning_rate": 2.9199999999999995e-05, |
|
"loss": 0.351, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 345.45, |
|
"learning_rate": 2.8366666666666664e-05, |
|
"loss": 0.343, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 345.45, |
|
"eval_loss": 0.6082874536514282, |
|
"eval_runtime": 13.5565, |
|
"eval_samples_per_second": 22.572, |
|
"eval_steps_per_second": 1.475, |
|
"eval_wer": 0.5944645006016848, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 350.0, |
|
"learning_rate": 2.7541666666666664e-05, |
|
"loss": 0.3379, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 354.55, |
|
"learning_rate": 2.6716666666666664e-05, |
|
"loss": 0.3257, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 359.09, |
|
"learning_rate": 2.588333333333333e-05, |
|
"loss": 0.3285, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 363.64, |
|
"learning_rate": 2.505e-05, |
|
"loss": 0.3085, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 363.64, |
|
"eval_loss": 0.6242865324020386, |
|
"eval_runtime": 13.4343, |
|
"eval_samples_per_second": 22.778, |
|
"eval_steps_per_second": 1.489, |
|
"eval_wer": 0.5728038507821901, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 368.18, |
|
"learning_rate": 2.4216666666666665e-05, |
|
"loss": 0.3127, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 372.73, |
|
"learning_rate": 2.338333333333333e-05, |
|
"loss": 0.3149, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 377.27, |
|
"learning_rate": 2.255e-05, |
|
"loss": 0.307, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 381.82, |
|
"learning_rate": 2.1716666666666665e-05, |
|
"loss": 0.2941, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 381.82, |
|
"eval_loss": 0.6244971752166748, |
|
"eval_runtime": 13.5184, |
|
"eval_samples_per_second": 22.636, |
|
"eval_steps_per_second": 1.479, |
|
"eval_wer": 0.5579622944243883, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 386.36, |
|
"learning_rate": 2.088333333333333e-05, |
|
"loss": 0.295, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 390.91, |
|
"learning_rate": 2.0049999999999996e-05, |
|
"loss": 0.2842, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 395.45, |
|
"learning_rate": 1.9216666666666665e-05, |
|
"loss": 0.2817, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 400.0, |
|
"learning_rate": 1.838333333333333e-05, |
|
"loss": 0.2735, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 400.0, |
|
"eval_loss": 0.6457676291465759, |
|
"eval_runtime": 13.5206, |
|
"eval_samples_per_second": 22.632, |
|
"eval_steps_per_second": 1.479, |
|
"eval_wer": 0.5804251905334937, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 404.55, |
|
"learning_rate": 1.755e-05, |
|
"loss": 0.2786, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 409.09, |
|
"learning_rate": 1.6716666666666665e-05, |
|
"loss": 0.2708, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 413.64, |
|
"learning_rate": 1.5883333333333334e-05, |
|
"loss": 0.2661, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 418.18, |
|
"learning_rate": 1.5049999999999998e-05, |
|
"loss": 0.262, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 418.18, |
|
"eval_loss": 0.656562328338623, |
|
"eval_runtime": 13.3117, |
|
"eval_samples_per_second": 22.987, |
|
"eval_steps_per_second": 1.502, |
|
"eval_wer": 0.5824308062575211, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 422.73, |
|
"learning_rate": 1.4216666666666666e-05, |
|
"loss": 0.2577, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 427.27, |
|
"learning_rate": 1.3383333333333331e-05, |
|
"loss": 0.2576, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 431.82, |
|
"learning_rate": 1.2549999999999998e-05, |
|
"loss": 0.2573, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 436.36, |
|
"learning_rate": 1.1716666666666666e-05, |
|
"loss": 0.2578, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 436.36, |
|
"eval_loss": 0.6557896733283997, |
|
"eval_runtime": 13.599, |
|
"eval_samples_per_second": 22.502, |
|
"eval_steps_per_second": 1.471, |
|
"eval_wer": 0.596470116325712, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 440.91, |
|
"learning_rate": 1.0883333333333331e-05, |
|
"loss": 0.2445, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 445.45, |
|
"learning_rate": 1.005e-05, |
|
"loss": 0.2422, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 450.0, |
|
"learning_rate": 9.216666666666666e-06, |
|
"loss": 0.2481, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 454.55, |
|
"learning_rate": 8.383333333333333e-06, |
|
"loss": 0.2388, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 454.55, |
|
"eval_loss": 0.659763514995575, |
|
"eval_runtime": 13.4458, |
|
"eval_samples_per_second": 22.758, |
|
"eval_steps_per_second": 1.487, |
|
"eval_wer": 0.5992779783393501, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 459.09, |
|
"learning_rate": 7.55e-06, |
|
"loss": 0.2362, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 463.64, |
|
"learning_rate": 6.716666666666666e-06, |
|
"loss": 0.2374, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 468.18, |
|
"learning_rate": 5.883333333333333e-06, |
|
"loss": 0.2352, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 472.73, |
|
"learning_rate": 5.049999999999999e-06, |
|
"loss": 0.2328, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 472.73, |
|
"eval_loss": 0.6699528694152832, |
|
"eval_runtime": 13.3932, |
|
"eval_samples_per_second": 22.847, |
|
"eval_steps_per_second": 1.493, |
|
"eval_wer": 0.6040914560770156, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 477.27, |
|
"learning_rate": 4.216666666666666e-06, |
|
"loss": 0.2286, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 481.82, |
|
"learning_rate": 3.3916666666666662e-06, |
|
"loss": 0.232, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 486.36, |
|
"learning_rate": 2.558333333333333e-06, |
|
"loss": 0.2323, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 490.91, |
|
"learning_rate": 1.7249999999999998e-06, |
|
"loss": 0.2286, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 490.91, |
|
"eval_loss": 0.6684302091598511, |
|
"eval_runtime": 13.5254, |
|
"eval_samples_per_second": 22.624, |
|
"eval_steps_per_second": 1.479, |
|
"eval_wer": 0.5956678700361011, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 495.45, |
|
"learning_rate": 8.916666666666666e-07, |
|
"loss": 0.229, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 500.0, |
|
"learning_rate": 5.8333333333333326e-08, |
|
"loss": 0.2255, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 500.0, |
|
"step": 11000, |
|
"total_flos": 6.534167693112984e+19, |
|
"train_loss": 1.1389288659529253, |
|
"train_runtime": 23734.473, |
|
"train_samples_per_second": 14.704, |
|
"train_steps_per_second": 0.463 |
|
} |
|
], |
|
"max_steps": 11000, |
|
"num_train_epochs": 500, |
|
"total_flos": 6.534167693112984e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|