|
{ |
|
"best_metric": 30.612702366127024, |
|
"best_model_checkpoint": "/cosmos/home/sp-operator/ai/training/models/huggingface/scripts/../breeze-listen-dsw-base-kn/checkpoint-700", |
|
"epoch": 14.006, |
|
"eval_steps": 100, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5.0453611334320685e-06, |
|
"loss": 1.8464, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 6.229195710491767e-06, |
|
"loss": 1.642, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 6.903829450223392e-06, |
|
"loss": 1.2861, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 7.377725845391017e-06, |
|
"loss": 0.7196, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"eval_loss": 0.5166015625, |
|
"eval_runtime": 2113.7829, |
|
"eval_samples_per_second": 0.388, |
|
"eval_steps_per_second": 0.025, |
|
"eval_wer": 55.21295143212951, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 7.743343231239583e-06, |
|
"loss": 0.4782, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 8.041073861170494e-06, |
|
"loss": 0.3642, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 8.292222957399574e-06, |
|
"loss": 0.3108, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 8.509413541357755e-06, |
|
"loss": 0.2769, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"eval_loss": 0.253173828125, |
|
"eval_runtime": 2105.4925, |
|
"eval_samples_per_second": 0.389, |
|
"eval_steps_per_second": 0.025, |
|
"eval_wer": 36.15940224159402, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 8.700744577655557e-06, |
|
"loss": 0.247, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 8.871723942761204e-06, |
|
"loss": 0.2221, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 9.026267958246849e-06, |
|
"loss": 0.2084, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 9.16726106663399e-06, |
|
"loss": 0.1896, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"eval_loss": 0.2166748046875, |
|
"eval_runtime": 2098.7697, |
|
"eval_samples_per_second": 0.391, |
|
"eval_steps_per_second": 0.025, |
|
"eval_wer": 32.72976338729763, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 9.296889251455016e-06, |
|
"loss": 0.1747, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 9.416848797368692e-06, |
|
"loss": 0.1644, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 9.528482449516371e-06, |
|
"loss": 0.153, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"learning_rate": 9.632871309784314e-06, |
|
"loss": 0.1384, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"eval_loss": 0.2037353515625, |
|
"eval_runtime": 2094.8651, |
|
"eval_samples_per_second": 0.391, |
|
"eval_steps_per_second": 0.025, |
|
"eval_wer": 31.835616438356162, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 9.73089868785391e-06, |
|
"loss": 0.1342, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 9.823295589572114e-06, |
|
"loss": 0.1195, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"learning_rate": 9.910673836465484e-06, |
|
"loss": 0.109, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 9.993550644973805e-06, |
|
"loss": 0.1099, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 0.2030029296875, |
|
"eval_runtime": 2095.5793, |
|
"eval_samples_per_second": 0.391, |
|
"eval_steps_per_second": 0.025, |
|
"eval_wer": 31.056039850560396, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 9.56e-06, |
|
"loss": 0.0928, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 9.060000000000001e-06, |
|
"loss": 0.0871, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 8.560000000000001e-06, |
|
"loss": 0.0842, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"learning_rate": 8.06e-06, |
|
"loss": 0.0707, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"eval_loss": 0.21533203125, |
|
"eval_runtime": 2096.294, |
|
"eval_samples_per_second": 0.391, |
|
"eval_steps_per_second": 0.025, |
|
"eval_wer": 31.245330012453298, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 7.5600000000000005e-06, |
|
"loss": 0.0659, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 7.06e-06, |
|
"loss": 0.0643, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"learning_rate": 6.560000000000001e-06, |
|
"loss": 0.056, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"learning_rate": 6.0600000000000004e-06, |
|
"loss": 0.052, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"eval_loss": 0.225830078125, |
|
"eval_runtime": 2096.1998, |
|
"eval_samples_per_second": 0.391, |
|
"eval_steps_per_second": 0.025, |
|
"eval_wer": 30.612702366127024, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 10.02, |
|
"learning_rate": 5.560000000000001e-06, |
|
"loss": 0.0483, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 10.04, |
|
"learning_rate": 5.060000000000001e-06, |
|
"loss": 0.0424, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 10.06, |
|
"learning_rate": 4.56e-06, |
|
"loss": 0.0422, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 11.02, |
|
"learning_rate": 4.060000000000001e-06, |
|
"loss": 0.0375, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 11.02, |
|
"eval_loss": 0.2413330078125, |
|
"eval_runtime": 2094.1326, |
|
"eval_samples_per_second": 0.392, |
|
"eval_steps_per_second": 0.025, |
|
"eval_wer": 31.220423412204234, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 11.04, |
|
"learning_rate": 3.5600000000000002e-06, |
|
"loss": 0.0329, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 11.07, |
|
"learning_rate": 3.0600000000000003e-06, |
|
"loss": 0.0329, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 12.02, |
|
"learning_rate": 2.56e-06, |
|
"loss": 0.0303, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 12.05, |
|
"learning_rate": 2.06e-06, |
|
"loss": 0.0256, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 12.05, |
|
"eval_loss": 0.250732421875, |
|
"eval_runtime": 2095.7098, |
|
"eval_samples_per_second": 0.391, |
|
"eval_steps_per_second": 0.025, |
|
"eval_wer": 31.06351183063512, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 1.56e-06, |
|
"loss": 0.0281, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 13.03, |
|
"learning_rate": 1.06e-06, |
|
"loss": 0.0247, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 13.05, |
|
"learning_rate": 5.6e-07, |
|
"loss": 0.0229, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 14.01, |
|
"learning_rate": 6.000000000000001e-08, |
|
"loss": 0.0245, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 14.01, |
|
"eval_loss": 0.2548828125, |
|
"eval_runtime": 2097.376, |
|
"eval_samples_per_second": 0.391, |
|
"eval_steps_per_second": 0.025, |
|
"eval_wer": 31.10585305105853, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 14.01, |
|
"step": 1000, |
|
"total_flos": 2.067347538305876e+18, |
|
"train_loss": 0.24649163818359374, |
|
"train_runtime": 54303.9792, |
|
"train_samples_per_second": 0.589, |
|
"train_steps_per_second": 0.018 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 1000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 100, |
|
"total_flos": 2.067347538305876e+18, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|