whisper-NST2-unfreeze / trainer_state.json
pere's picture
tokenizer
3c46102
{
"best_metric": 7.639680729760548,
"best_model_checkpoint": "../whisper-NST2/checkpoint-8000",
"epoch": 1.3959,
"global_step": 10000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03,
"learning_rate": 1.976e-05,
"loss": 0.9006,
"step": 250
},
{
"epoch": 0.05,
"learning_rate": 3.9760000000000006e-05,
"loss": 0.2407,
"step": 500
},
{
"epoch": 0.07,
"learning_rate": 3.896e-05,
"loss": 0.2145,
"step": 750
},
{
"epoch": 0.1,
"learning_rate": 3.790736842105263e-05,
"loss": 0.1846,
"step": 1000
},
{
"epoch": 0.1,
"eval_loss": 0.34600791335105896,
"eval_runtime": 44.1083,
"eval_samples_per_second": 2.267,
"eval_steps_per_second": 0.159,
"eval_wer": 14.937286202964653,
"step": 1000
},
{
"epoch": 0.12,
"learning_rate": 3.6854736842105265e-05,
"loss": 0.1663,
"step": 1250
},
{
"epoch": 0.15,
"learning_rate": 3.58021052631579e-05,
"loss": 0.1567,
"step": 1500
},
{
"epoch": 0.17,
"learning_rate": 3.474947368421053e-05,
"loss": 0.1449,
"step": 1750
},
{
"epoch": 0.2,
"learning_rate": 3.369684210526316e-05,
"loss": 0.1325,
"step": 2000
},
{
"epoch": 0.2,
"eval_loss": 0.34131211042404175,
"eval_runtime": 45.4411,
"eval_samples_per_second": 2.201,
"eval_steps_per_second": 0.154,
"eval_wer": 11.402508551881414,
"step": 2000
},
{
"epoch": 0.23,
"learning_rate": 3.2644210526315796e-05,
"loss": 0.1292,
"step": 2250
},
{
"epoch": 0.25,
"learning_rate": 3.159157894736842e-05,
"loss": 0.1225,
"step": 2500
},
{
"epoch": 0.28,
"learning_rate": 3.0538947368421055e-05,
"loss": 0.1168,
"step": 2750
},
{
"epoch": 0.3,
"learning_rate": 2.9486315789473688e-05,
"loss": 0.1135,
"step": 3000
},
{
"epoch": 0.3,
"eval_loss": 0.3427869379520416,
"eval_runtime": 43.6701,
"eval_samples_per_second": 2.29,
"eval_steps_per_second": 0.16,
"eval_wer": 12.656784492588368,
"step": 3000
},
{
"epoch": 0.33,
"learning_rate": 2.843368421052632e-05,
"loss": 0.1074,
"step": 3250
},
{
"epoch": 0.35,
"learning_rate": 2.738105263157895e-05,
"loss": 0.1039,
"step": 3500
},
{
"epoch": 0.38,
"learning_rate": 2.6328421052631582e-05,
"loss": 0.0973,
"step": 3750
},
{
"epoch": 0.4,
"learning_rate": 2.5275789473684215e-05,
"loss": 0.0955,
"step": 4000
},
{
"epoch": 0.4,
"eval_loss": 0.31404200196266174,
"eval_runtime": 43.7248,
"eval_samples_per_second": 2.287,
"eval_steps_per_second": 0.16,
"eval_wer": 10.718358038768528,
"step": 4000
},
{
"epoch": 0.42,
"learning_rate": 2.4223157894736848e-05,
"loss": 0.0914,
"step": 4250
},
{
"epoch": 0.45,
"learning_rate": 2.3170526315789474e-05,
"loss": 0.0878,
"step": 4500
},
{
"epoch": 0.47,
"learning_rate": 2.2117894736842107e-05,
"loss": 0.0857,
"step": 4750
},
{
"epoch": 0.5,
"learning_rate": 2.106526315789474e-05,
"loss": 0.0871,
"step": 5000
},
{
"epoch": 0.5,
"eval_loss": 0.2906649112701416,
"eval_runtime": 53.0801,
"eval_samples_per_second": 1.884,
"eval_steps_per_second": 0.132,
"eval_wer": 9.464082098061574,
"step": 5000
},
{
"epoch": 0.53,
"learning_rate": 2.001263157894737e-05,
"loss": 0.0806,
"step": 5250
},
{
"epoch": 0.55,
"learning_rate": 1.896e-05,
"loss": 0.078,
"step": 5500
},
{
"epoch": 0.57,
"learning_rate": 1.7907368421052634e-05,
"loss": 0.0779,
"step": 5750
},
{
"epoch": 0.6,
"learning_rate": 1.6854736842105264e-05,
"loss": 0.0774,
"step": 6000
},
{
"epoch": 0.6,
"eval_loss": 0.3019491136074066,
"eval_runtime": 49.1546,
"eval_samples_per_second": 2.034,
"eval_steps_per_second": 0.142,
"eval_wer": 11.402508551881414,
"step": 6000
},
{
"epoch": 1.02,
"learning_rate": 1.5802105263157896e-05,
"loss": 0.0674,
"step": 6250
},
{
"epoch": 1.05,
"learning_rate": 1.4749473684210527e-05,
"loss": 0.0526,
"step": 6500
},
{
"epoch": 1.07,
"learning_rate": 1.369684210526316e-05,
"loss": 0.0451,
"step": 6750
},
{
"epoch": 1.1,
"learning_rate": 1.264421052631579e-05,
"loss": 0.041,
"step": 7000
},
{
"epoch": 1.1,
"eval_loss": 0.2897065281867981,
"eval_runtime": 42.2915,
"eval_samples_per_second": 2.365,
"eval_steps_per_second": 0.166,
"eval_wer": 9.007981755986318,
"step": 7000
},
{
"epoch": 1.12,
"learning_rate": 1.1591578947368422e-05,
"loss": 0.0381,
"step": 7250
},
{
"epoch": 1.15,
"learning_rate": 1.0538947368421053e-05,
"loss": 0.0365,
"step": 7500
},
{
"epoch": 1.17,
"learning_rate": 9.486315789473684e-06,
"loss": 0.0343,
"step": 7750
},
{
"epoch": 1.2,
"learning_rate": 8.433684210526317e-06,
"loss": 0.0306,
"step": 8000
},
{
"epoch": 1.2,
"eval_loss": 0.30130401253700256,
"eval_runtime": 48.5193,
"eval_samples_per_second": 2.061,
"eval_steps_per_second": 0.144,
"eval_wer": 7.639680729760548,
"step": 8000
},
{
"epoch": 1.22,
"learning_rate": 7.385263157894737e-06,
"loss": 0.0294,
"step": 8250
},
{
"epoch": 1.25,
"learning_rate": 6.332631578947369e-06,
"loss": 0.0288,
"step": 8500
},
{
"epoch": 1.27,
"learning_rate": 5.28e-06,
"loss": 0.0276,
"step": 8750
},
{
"epoch": 1.3,
"learning_rate": 4.227368421052631e-06,
"loss": 0.0279,
"step": 9000
},
{
"epoch": 1.3,
"eval_loss": 0.29579514265060425,
"eval_runtime": 42.0972,
"eval_samples_per_second": 2.375,
"eval_steps_per_second": 0.166,
"eval_wer": 9.122006841505131,
"step": 9000
},
{
"epoch": 1.32,
"learning_rate": 3.174736842105263e-06,
"loss": 0.0256,
"step": 9250
},
{
"epoch": 1.35,
"learning_rate": 2.1221052631578947e-06,
"loss": 0.0256,
"step": 9500
},
{
"epoch": 1.37,
"learning_rate": 1.0694736842105266e-06,
"loss": 0.0226,
"step": 9750
},
{
"epoch": 1.4,
"learning_rate": 1.6842105263157897e-08,
"loss": 0.0239,
"step": 10000
},
{
"epoch": 1.4,
"eval_loss": 0.29898393154144287,
"eval_runtime": 56.0138,
"eval_samples_per_second": 1.785,
"eval_steps_per_second": 0.125,
"eval_wer": 7.753705815279361,
"step": 10000
},
{
"epoch": 1.4,
"step": 10000,
"total_flos": 1.3851118024998912e+20,
"train_loss": 0.10624286437034607,
"train_runtime": 115360.2706,
"train_samples_per_second": 8.322,
"train_steps_per_second": 0.087
}
],
"max_steps": 10000,
"num_train_epochs": 9223372036854775807,
"total_flos": 1.3851118024998912e+20,
"trial_name": null,
"trial_params": null
}