|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 99.99521531100478, |
|
"global_step": 5200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 3.2333333333333334e-06, |
|
"loss": 13.5304, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 6.566666666666667e-06, |
|
"loss": 3.5412, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"learning_rate": 9.9e-06, |
|
"loss": 3.1425, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"learning_rate": 9.80204081632653e-06, |
|
"loss": 2.0204, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 9.61, |
|
"learning_rate": 9.59795918367347e-06, |
|
"loss": 1.5794, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 9.61, |
|
"eval_loss": 0.9859886169433594, |
|
"eval_runtime": 65.6063, |
|
"eval_samples_per_second": 16.95, |
|
"eval_steps_per_second": 4.237, |
|
"eval_wer": 0.9582062780269058, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 11.54, |
|
"learning_rate": 9.39387755102041e-06, |
|
"loss": 1.4015, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 13.46, |
|
"learning_rate": 9.189795918367347e-06, |
|
"loss": 1.2803, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 15.38, |
|
"learning_rate": 8.985714285714287e-06, |
|
"loss": 1.1969, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 17.31, |
|
"learning_rate": 8.781632653061225e-06, |
|
"loss": 1.1458, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 19.23, |
|
"learning_rate": 8.577551020408163e-06, |
|
"loss": 1.0985, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 19.23, |
|
"eval_loss": 0.5948446989059448, |
|
"eval_runtime": 63.7089, |
|
"eval_samples_per_second": 17.454, |
|
"eval_steps_per_second": 4.364, |
|
"eval_wer": 0.7533632286995515, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 21.15, |
|
"learning_rate": 8.373469387755104e-06, |
|
"loss": 1.0639, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 23.08, |
|
"learning_rate": 8.169387755102042e-06, |
|
"loss": 1.0214, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 7.96530612244898e-06, |
|
"loss": 1.0047, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 26.92, |
|
"learning_rate": 7.76122448979592e-06, |
|
"loss": 0.971, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 28.84, |
|
"learning_rate": 7.557142857142857e-06, |
|
"loss": 0.9551, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 28.84, |
|
"eval_loss": 0.5892378091812134, |
|
"eval_runtime": 64.7699, |
|
"eval_samples_per_second": 17.168, |
|
"eval_steps_per_second": 4.292, |
|
"eval_wer": 0.7391928251121076, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 30.77, |
|
"learning_rate": 7.353061224489797e-06, |
|
"loss": 0.9387, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 32.69, |
|
"learning_rate": 7.148979591836735e-06, |
|
"loss": 0.9272, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 34.61, |
|
"learning_rate": 6.944897959183674e-06, |
|
"loss": 0.9192, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 36.54, |
|
"learning_rate": 6.740816326530613e-06, |
|
"loss": 0.892, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 38.46, |
|
"learning_rate": 6.5367346938775515e-06, |
|
"loss": 0.8699, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 38.46, |
|
"eval_loss": 0.5926958322525024, |
|
"eval_runtime": 64.9144, |
|
"eval_samples_per_second": 17.13, |
|
"eval_steps_per_second": 4.283, |
|
"eval_wer": 0.72, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 40.38, |
|
"learning_rate": 6.332653061224491e-06, |
|
"loss": 0.8779, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 42.31, |
|
"learning_rate": 6.128571428571429e-06, |
|
"loss": 0.8511, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 44.23, |
|
"learning_rate": 5.924489795918368e-06, |
|
"loss": 0.8532, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 46.15, |
|
"learning_rate": 5.720408163265306e-06, |
|
"loss": 0.8442, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 48.08, |
|
"learning_rate": 5.516326530612245e-06, |
|
"loss": 0.8372, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 48.08, |
|
"eval_loss": 0.5938696265220642, |
|
"eval_runtime": 64.5641, |
|
"eval_samples_per_second": 17.223, |
|
"eval_steps_per_second": 4.306, |
|
"eval_wer": 0.73847533632287, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 5.312244897959185e-06, |
|
"loss": 0.8112, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 51.92, |
|
"learning_rate": 5.108163265306123e-06, |
|
"loss": 0.8165, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 53.84, |
|
"learning_rate": 4.904081632653061e-06, |
|
"loss": 0.8045, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 55.77, |
|
"learning_rate": 4.7e-06, |
|
"loss": 0.7974, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 57.69, |
|
"learning_rate": 4.4959183673469394e-06, |
|
"loss": 0.7794, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 57.69, |
|
"eval_loss": 0.5800846815109253, |
|
"eval_runtime": 63.4747, |
|
"eval_samples_per_second": 17.519, |
|
"eval_steps_per_second": 4.38, |
|
"eval_wer": 0.7388340807174888, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 59.61, |
|
"learning_rate": 4.291836734693878e-06, |
|
"loss": 0.7752, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 61.54, |
|
"learning_rate": 4.087755102040817e-06, |
|
"loss": 0.7765, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 63.46, |
|
"learning_rate": 3.8836734693877556e-06, |
|
"loss": 0.7584, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 65.38, |
|
"learning_rate": 3.6795918367346943e-06, |
|
"loss": 0.7589, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 67.31, |
|
"learning_rate": 3.475510204081633e-06, |
|
"loss": 0.7565, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 67.31, |
|
"eval_loss": 0.582656979560852, |
|
"eval_runtime": 71.1025, |
|
"eval_samples_per_second": 15.639, |
|
"eval_steps_per_second": 3.91, |
|
"eval_wer": 0.7183856502242153, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 69.23, |
|
"learning_rate": 3.2714285714285717e-06, |
|
"loss": 0.7595, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 71.15, |
|
"learning_rate": 3.0673469387755104e-06, |
|
"loss": 0.747, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 73.08, |
|
"learning_rate": 2.863265306122449e-06, |
|
"loss": 0.7373, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"learning_rate": 2.6591836734693882e-06, |
|
"loss": 0.7268, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 76.92, |
|
"learning_rate": 2.455102040816327e-06, |
|
"loss": 0.7303, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 76.92, |
|
"eval_loss": 0.580017626285553, |
|
"eval_runtime": 63.495, |
|
"eval_samples_per_second": 17.513, |
|
"eval_steps_per_second": 4.378, |
|
"eval_wer": 0.7296860986547086, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 78.84, |
|
"learning_rate": 2.2510204081632656e-06, |
|
"loss": 0.7239, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 80.77, |
|
"learning_rate": 2.0469387755102044e-06, |
|
"loss": 0.7201, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 82.69, |
|
"learning_rate": 1.8428571428571428e-06, |
|
"loss": 0.7185, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 84.61, |
|
"learning_rate": 1.6387755102040818e-06, |
|
"loss": 0.7149, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 86.54, |
|
"learning_rate": 1.4346938775510205e-06, |
|
"loss": 0.7128, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 86.54, |
|
"eval_loss": 0.5977014899253845, |
|
"eval_runtime": 64.1516, |
|
"eval_samples_per_second": 17.334, |
|
"eval_steps_per_second": 4.333, |
|
"eval_wer": 0.7149775784753363, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 88.46, |
|
"learning_rate": 1.2306122448979594e-06, |
|
"loss": 0.705, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 90.38, |
|
"learning_rate": 1.026530612244898e-06, |
|
"loss": 0.7129, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 92.31, |
|
"learning_rate": 8.224489795918368e-07, |
|
"loss": 0.7076, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 94.23, |
|
"learning_rate": 6.183673469387755e-07, |
|
"loss": 0.7126, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 96.15, |
|
"learning_rate": 4.142857142857143e-07, |
|
"loss": 0.6972, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 96.15, |
|
"eval_loss": 0.5900700092315674, |
|
"eval_runtime": 63.2743, |
|
"eval_samples_per_second": 17.574, |
|
"eval_steps_per_second": 4.394, |
|
"eval_wer": 0.7176681614349776, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 98.08, |
|
"learning_rate": 2.1020408163265306e-07, |
|
"loss": 0.7001, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 6.1224489795918365e-09, |
|
"loss": 0.6965, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"step": 5200, |
|
"total_flos": 1.4653197993726655e+20, |
|
"train_loss": 1.2273276989276594, |
|
"train_runtime": 32753.7935, |
|
"train_samples_per_second": 10.185, |
|
"train_steps_per_second": 0.159 |
|
} |
|
], |
|
"max_steps": 5200, |
|
"num_train_epochs": 100, |
|
"total_flos": 1.4653197993726655e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|