|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 31.71508379888268, |
|
"global_step": 4250, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 5e-05, |
|
"loss": 0.0641, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 4.999626837823718e-05, |
|
"loss": 0.041, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_loss": 0.1974974274635315, |
|
"eval_runtime": 282.4584, |
|
"eval_samples_per_second": 13.57, |
|
"eval_steps_per_second": 1.699, |
|
"eval_wer": 0.18217074305806918, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 4.999253675647437e-05, |
|
"loss": 0.0381, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 4.998884245092918e-05, |
|
"loss": 0.0343, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 4.998511082916636e-05, |
|
"loss": 0.0325, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"eval_loss": 0.2165367156267166, |
|
"eval_runtime": 199.6867, |
|
"eval_samples_per_second": 19.195, |
|
"eval_steps_per_second": 2.404, |
|
"eval_wer": 0.18279125084027095, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 4.998137920740354e-05, |
|
"loss": 0.0324, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 4.9977647585640724e-05, |
|
"loss": 0.0303, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"eval_loss": 0.23278702795505524, |
|
"eval_runtime": 211.6975, |
|
"eval_samples_per_second": 18.106, |
|
"eval_steps_per_second": 2.267, |
|
"eval_wer": 0.1827395418584208, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"learning_rate": 4.9973915963877904e-05, |
|
"loss": 0.031, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"learning_rate": 4.9970184342115084e-05, |
|
"loss": 0.0287, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 7.46, |
|
"learning_rate": 4.996645272035227e-05, |
|
"loss": 0.0281, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 7.46, |
|
"eval_loss": 0.23425738513469696, |
|
"eval_runtime": 209.1881, |
|
"eval_samples_per_second": 18.323, |
|
"eval_steps_per_second": 2.295, |
|
"eval_wer": 0.18170536222141787, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 8.21, |
|
"learning_rate": 4.996272109858945e-05, |
|
"loss": 0.0266, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"learning_rate": 4.995898947682663e-05, |
|
"loss": 0.0263, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 9.33, |
|
"eval_loss": 0.23851124942302704, |
|
"eval_runtime": 208.1334, |
|
"eval_samples_per_second": 18.416, |
|
"eval_steps_per_second": 2.306, |
|
"eval_wer": 0.17855111432855886, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 9.7, |
|
"learning_rate": 4.995525785506381e-05, |
|
"loss": 0.0265, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 10.45, |
|
"learning_rate": 4.995152623330099e-05, |
|
"loss": 0.0263, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 11.19, |
|
"learning_rate": 4.994779461153818e-05, |
|
"loss": 0.0262, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 11.19, |
|
"eval_loss": 0.2488125115633011, |
|
"eval_runtime": 216.4749, |
|
"eval_samples_per_second": 17.706, |
|
"eval_steps_per_second": 2.217, |
|
"eval_wer": 0.17699984487305445, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 11.94, |
|
"learning_rate": 4.994406298977536e-05, |
|
"loss": 0.0237, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 12.69, |
|
"learning_rate": 4.994033136801254e-05, |
|
"loss": 0.0249, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 13.06, |
|
"eval_loss": 0.261038601398468, |
|
"eval_runtime": 219.4529, |
|
"eval_samples_per_second": 17.466, |
|
"eval_steps_per_second": 2.187, |
|
"eval_wer": 0.17513832152644915, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 13.43, |
|
"learning_rate": 4.9936599746249724e-05, |
|
"loss": 0.0235, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 14.18, |
|
"learning_rate": 4.9932868124486904e-05, |
|
"loss": 0.0246, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 14.92, |
|
"learning_rate": 4.992913650272409e-05, |
|
"loss": 0.0227, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 14.92, |
|
"eval_loss": 0.25547119975090027, |
|
"eval_runtime": 215.7055, |
|
"eval_samples_per_second": 17.77, |
|
"eval_steps_per_second": 2.225, |
|
"eval_wer": 0.17472464967164797, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 15.67, |
|
"learning_rate": 4.992540488096127e-05, |
|
"loss": 0.0215, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 16.42, |
|
"learning_rate": 4.992167325919845e-05, |
|
"loss": 0.0218, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 16.79, |
|
"eval_loss": 0.25752121210098267, |
|
"eval_runtime": 207.5079, |
|
"eval_samples_per_second": 18.472, |
|
"eval_steps_per_second": 2.313, |
|
"eval_wer": 0.17425926883499665, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 17.16, |
|
"learning_rate": 4.991794163743563e-05, |
|
"loss": 0.0213, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 17.91, |
|
"learning_rate": 4.991421001567281e-05, |
|
"loss": 0.0212, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 18.66, |
|
"learning_rate": 4.991047839390999e-05, |
|
"loss": 0.0211, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 18.66, |
|
"eval_loss": 0.2587244212627411, |
|
"eval_runtime": 211.1091, |
|
"eval_samples_per_second": 18.156, |
|
"eval_steps_per_second": 2.274, |
|
"eval_wer": 0.1761725011634521, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 19.4, |
|
"learning_rate": 4.990674677214718e-05, |
|
"loss": 0.0201, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 20.15, |
|
"learning_rate": 4.990301515038436e-05, |
|
"loss": 0.0222, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 20.52, |
|
"eval_loss": 0.26894259452819824, |
|
"eval_runtime": 212.0938, |
|
"eval_samples_per_second": 18.072, |
|
"eval_steps_per_second": 2.263, |
|
"eval_wer": 0.17451781374424738, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 20.89, |
|
"learning_rate": 4.9899283528621545e-05, |
|
"loss": 0.0207, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 21.64, |
|
"learning_rate": 4.9895551906858725e-05, |
|
"loss": 0.0194, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 22.39, |
|
"learning_rate": 4.9891820285095905e-05, |
|
"loss": 0.021, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 22.39, |
|
"eval_loss": 0.26362183690071106, |
|
"eval_runtime": 204.8903, |
|
"eval_samples_per_second": 18.708, |
|
"eval_steps_per_second": 2.343, |
|
"eval_wer": 0.1751900305082993, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 23.13, |
|
"learning_rate": 4.9888088663333085e-05, |
|
"loss": 0.0216, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 23.88, |
|
"learning_rate": 4.9884357041570265e-05, |
|
"loss": 0.02, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 24.25, |
|
"eval_loss": 0.2709738612174988, |
|
"eval_runtime": 211.3483, |
|
"eval_samples_per_second": 18.136, |
|
"eval_steps_per_second": 2.271, |
|
"eval_wer": 0.17513832152644915, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 24.63, |
|
"learning_rate": 4.988062541980745e-05, |
|
"loss": 0.02, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 25.37, |
|
"learning_rate": 4.987689379804463e-05, |
|
"loss": 0.0206, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 26.12, |
|
"learning_rate": 4.987316217628181e-05, |
|
"loss": 0.0195, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 26.12, |
|
"eval_loss": 0.26540952920913696, |
|
"eval_runtime": 209.7499, |
|
"eval_samples_per_second": 18.274, |
|
"eval_steps_per_second": 2.288, |
|
"eval_wer": 0.17570712032680078, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 26.86, |
|
"learning_rate": 4.9869430554519e-05, |
|
"loss": 0.0193, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 27.61, |
|
"learning_rate": 4.986569893275618e-05, |
|
"loss": 0.0197, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 27.98, |
|
"eval_loss": 0.26861485838890076, |
|
"eval_runtime": 224.5, |
|
"eval_samples_per_second": 17.073, |
|
"eval_steps_per_second": 2.138, |
|
"eval_wer": 0.17482806763534825, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 28.36, |
|
"learning_rate": 4.9861967310993365e-05, |
|
"loss": 0.0198, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 29.1, |
|
"learning_rate": 4.9858235689230545e-05, |
|
"loss": 0.0199, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 29.85, |
|
"learning_rate": 4.9854504067467725e-05, |
|
"loss": 0.0187, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 29.85, |
|
"eval_loss": 0.2687038481235504, |
|
"eval_runtime": 221.4533, |
|
"eval_samples_per_second": 17.308, |
|
"eval_steps_per_second": 2.168, |
|
"eval_wer": 0.1751900305082993, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 30.6, |
|
"learning_rate": 4.9850772445704905e-05, |
|
"loss": 0.019, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 31.34, |
|
"learning_rate": 4.9847040823942085e-05, |
|
"loss": 0.0188, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 31.72, |
|
"eval_loss": 0.2625608444213867, |
|
"eval_runtime": 222.8747, |
|
"eval_samples_per_second": 17.198, |
|
"eval_steps_per_second": 2.154, |
|
"eval_wer": 0.1721392005791406, |
|
"step": 4250 |
|
} |
|
], |
|
"max_steps": 1340000, |
|
"num_train_epochs": 10000, |
|
"total_flos": 9.643398954166528e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|