xls-r-300m-sv / trainer_state.json
chmanoj's picture
End of training
8954898
raw
history blame
5.93 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 9.997824510514866,
"global_step": 3440,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.29,
"learning_rate": 5e-05,
"loss": 7.294,
"step": 100
},
{
"epoch": 0.58,
"learning_rate": 0.0001,
"loss": 3.2282,
"step": 200
},
{
"epoch": 0.87,
"learning_rate": 0.00015,
"loss": 3.0143,
"step": 300
},
{
"epoch": 1.16,
"learning_rate": 0.0002,
"loss": 2.9501,
"step": 400
},
{
"epoch": 1.45,
"learning_rate": 0.00025,
"loss": 2.6683,
"step": 500
},
{
"epoch": 1.45,
"eval_loss": 1.7697819471359253,
"eval_runtime": 250.7581,
"eval_samples_per_second": 18.424,
"eval_steps_per_second": 4.606,
"eval_wer": 1.0040690203756961,
"step": 500
},
{
"epoch": 1.74,
"learning_rate": 0.0003,
"loss": 2.1972,
"step": 600
},
{
"epoch": 2.03,
"learning_rate": 0.00035,
"loss": 2.0775,
"step": 700
},
{
"epoch": 2.32,
"learning_rate": 0.0004,
"loss": 1.9871,
"step": 800
},
{
"epoch": 2.61,
"learning_rate": 0.00045000000000000004,
"loss": 1.9768,
"step": 900
},
{
"epoch": 2.91,
"learning_rate": 0.0005,
"loss": 1.9548,
"step": 1000
},
{
"epoch": 2.91,
"eval_loss": 1.0889918804168701,
"eval_runtime": 243.6531,
"eval_samples_per_second": 18.961,
"eval_steps_per_second": 4.74,
"eval_wer": 0.8601847885945053,
"step": 1000
},
{
"epoch": 3.2,
"learning_rate": 0.00055,
"loss": 1.9588,
"step": 1100
},
{
"epoch": 3.49,
"learning_rate": 0.0006,
"loss": 1.9734,
"step": 1200
},
{
"epoch": 3.78,
"learning_rate": 0.0006495,
"loss": 1.9725,
"step": 1300
},
{
"epoch": 4.07,
"learning_rate": 0.0006995,
"loss": 1.9717,
"step": 1400
},
{
"epoch": 4.36,
"learning_rate": 0.0007495000000000001,
"loss": 1.9568,
"step": 1500
},
{
"epoch": 4.36,
"eval_loss": 1.0877875089645386,
"eval_runtime": 239.5705,
"eval_samples_per_second": 19.285,
"eval_steps_per_second": 4.821,
"eval_wer": 0.868016887964266,
"step": 1500
},
{
"epoch": 4.65,
"learning_rate": 0.0007995,
"loss": 1.9528,
"step": 1600
},
{
"epoch": 4.94,
"learning_rate": 0.0008495000000000001,
"loss": 1.9862,
"step": 1700
},
{
"epoch": 5.23,
"learning_rate": 0.0008995,
"loss": 1.9592,
"step": 1800
},
{
"epoch": 5.52,
"learning_rate": 0.0009495,
"loss": 1.938,
"step": 1900
},
{
"epoch": 5.81,
"learning_rate": 0.0009995000000000002,
"loss": 1.9497,
"step": 2000
},
{
"epoch": 5.81,
"eval_loss": 1.1500531435012817,
"eval_runtime": 242.7035,
"eval_samples_per_second": 19.036,
"eval_steps_per_second": 4.759,
"eval_wer": 0.8837728691182769,
"step": 2000
},
{
"epoch": 6.1,
"learning_rate": 0.00093125,
"loss": 1.9326,
"step": 2100
},
{
"epoch": 6.39,
"learning_rate": 0.0008618055555555557,
"loss": 1.8927,
"step": 2200
},
{
"epoch": 6.68,
"learning_rate": 0.0007923611111111111,
"loss": 1.8929,
"step": 2300
},
{
"epoch": 6.97,
"learning_rate": 0.0007229166666666666,
"loss": 1.8684,
"step": 2400
},
{
"epoch": 7.27,
"learning_rate": 0.0006534722222222223,
"loss": 1.8453,
"step": 2500
},
{
"epoch": 7.27,
"eval_loss": 1.0452075004577637,
"eval_runtime": 247.1245,
"eval_samples_per_second": 18.695,
"eval_steps_per_second": 4.674,
"eval_wer": 0.8417977115584654,
"step": 2500
},
{
"epoch": 7.56,
"learning_rate": 0.0005840277777777778,
"loss": 1.8081,
"step": 2600
},
{
"epoch": 7.85,
"learning_rate": 0.0005145833333333333,
"loss": 1.78,
"step": 2700
},
{
"epoch": 8.14,
"learning_rate": 0.00044513888888888885,
"loss": 1.7618,
"step": 2800
},
{
"epoch": 8.43,
"learning_rate": 0.00037569444444444445,
"loss": 1.7155,
"step": 2900
},
{
"epoch": 8.72,
"learning_rate": 0.00030625000000000004,
"loss": 1.6952,
"step": 3000
},
{
"epoch": 8.72,
"eval_loss": 0.9152895212173462,
"eval_runtime": 242.409,
"eval_samples_per_second": 19.059,
"eval_steps_per_second": 4.765,
"eval_wer": 0.7822615186930184,
"step": 3000
},
{
"epoch": 9.01,
"learning_rate": 0.00023680555555555556,
"loss": 1.6874,
"step": 3100
},
{
"epoch": 9.3,
"learning_rate": 0.0001673611111111111,
"loss": 1.6143,
"step": 3200
},
{
"epoch": 9.59,
"learning_rate": 9.791666666666667e-05,
"loss": 1.5948,
"step": 3300
},
{
"epoch": 9.88,
"learning_rate": 2.9166666666666666e-05,
"loss": 1.5723,
"step": 3400
},
{
"epoch": 10.0,
"step": 3440,
"total_flos": 1.01153307245466e+19,
"train_loss": 2.1586562755496,
"train_runtime": 12185.0109,
"train_samples_per_second": 9.052,
"train_steps_per_second": 0.282
}
],
"max_steps": 3440,
"num_train_epochs": 10,
"total_flos": 1.01153307245466e+19,
"trial_name": null,
"trial_params": null
}