mms-300m-sakha / trainer_state.json
volodya-leveryev's picture
End of training
9b2efc8
raw
history blame
7.98 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 75.0,
"eval_steps": 100,
"global_step": 2100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 3.57,
"learning_rate": 2.5e-05,
"loss": 8.1262,
"step": 100
},
{
"epoch": 3.57,
"eval_loss": 4.0765790939331055,
"eval_runtime": 21.8341,
"eval_samples_per_second": 18.091,
"eval_steps_per_second": 0.595,
"eval_wer": 1.0,
"step": 100
},
{
"epoch": 7.14,
"learning_rate": 5e-05,
"loss": 3.5989,
"step": 200
},
{
"epoch": 7.14,
"eval_loss": 3.226550340652466,
"eval_runtime": 21.5714,
"eval_samples_per_second": 18.311,
"eval_steps_per_second": 0.603,
"eval_wer": 1.0,
"step": 200
},
{
"epoch": 10.71,
"learning_rate": 7.5e-05,
"loss": 3.0472,
"step": 300
},
{
"epoch": 10.71,
"eval_loss": 2.8889501094818115,
"eval_runtime": 21.7358,
"eval_samples_per_second": 18.173,
"eval_steps_per_second": 0.598,
"eval_wer": 1.0,
"step": 300
},
{
"epoch": 14.29,
"learning_rate": 0.0001,
"loss": 1.7327,
"step": 400
},
{
"epoch": 14.29,
"eval_loss": 0.6693879961967468,
"eval_runtime": 22.0147,
"eval_samples_per_second": 17.943,
"eval_steps_per_second": 0.591,
"eval_wer": 0.6622969506982046,
"step": 400
},
{
"epoch": 17.86,
"learning_rate": 0.000125,
"loss": 0.4239,
"step": 500
},
{
"epoch": 17.86,
"eval_loss": 0.4327137768268585,
"eval_runtime": 21.6438,
"eval_samples_per_second": 18.25,
"eval_steps_per_second": 0.601,
"eval_wer": 0.5483043602165859,
"step": 500
},
{
"epoch": 21.43,
"learning_rate": 0.00015,
"loss": 0.2337,
"step": 600
},
{
"epoch": 21.43,
"eval_loss": 0.38920775055885315,
"eval_runtime": 21.8228,
"eval_samples_per_second": 18.1,
"eval_steps_per_second": 0.596,
"eval_wer": 0.5195212311199772,
"step": 600
},
{
"epoch": 25.0,
"learning_rate": 0.000175,
"loss": 0.1724,
"step": 700
},
{
"epoch": 25.0,
"eval_loss": 0.391787052154541,
"eval_runtime": 21.5976,
"eval_samples_per_second": 18.289,
"eval_steps_per_second": 0.602,
"eval_wer": 0.5027073240239385,
"step": 700
},
{
"epoch": 28.57,
"learning_rate": 0.0002,
"loss": 0.1392,
"step": 800
},
{
"epoch": 28.57,
"eval_loss": 0.36863410472869873,
"eval_runtime": 21.5216,
"eval_samples_per_second": 18.354,
"eval_steps_per_second": 0.604,
"eval_wer": 0.4670846394984326,
"step": 800
},
{
"epoch": 32.14,
"learning_rate": 0.00022500000000000002,
"loss": 0.1196,
"step": 900
},
{
"epoch": 32.14,
"eval_loss": 0.3849872052669525,
"eval_runtime": 21.7525,
"eval_samples_per_second": 18.159,
"eval_steps_per_second": 0.598,
"eval_wer": 0.45397549159304645,
"step": 900
},
{
"epoch": 35.71,
"learning_rate": 0.00025,
"loss": 0.1095,
"step": 1000
},
{
"epoch": 35.71,
"eval_loss": 0.3586702346801758,
"eval_runtime": 21.8949,
"eval_samples_per_second": 18.041,
"eval_steps_per_second": 0.594,
"eval_wer": 0.4516956397834141,
"step": 1000
},
{
"epoch": 39.29,
"learning_rate": 0.000275,
"loss": 0.1026,
"step": 1100
},
{
"epoch": 39.29,
"eval_loss": 0.36062636971473694,
"eval_runtime": 21.8096,
"eval_samples_per_second": 18.111,
"eval_steps_per_second": 0.596,
"eval_wer": 0.44029638073525224,
"step": 1100
},
{
"epoch": 42.86,
"learning_rate": 0.0003,
"loss": 0.0972,
"step": 1200
},
{
"epoch": 42.86,
"eval_loss": 0.40814533829689026,
"eval_runtime": 21.9118,
"eval_samples_per_second": 18.027,
"eval_steps_per_second": 0.593,
"eval_wer": 0.43317184383015106,
"step": 1200
},
{
"epoch": 46.43,
"learning_rate": 0.00032500000000000004,
"loss": 0.0932,
"step": 1300
},
{
"epoch": 46.43,
"eval_loss": 0.3904629945755005,
"eval_runtime": 21.7505,
"eval_samples_per_second": 18.16,
"eval_steps_per_second": 0.598,
"eval_wer": 0.426902251353662,
"step": 1300
},
{
"epoch": 50.0,
"learning_rate": 0.00035,
"loss": 0.0897,
"step": 1400
},
{
"epoch": 50.0,
"eval_loss": 0.37547701597213745,
"eval_runtime": 22.1102,
"eval_samples_per_second": 17.865,
"eval_steps_per_second": 0.588,
"eval_wer": 0.42747221430607013,
"step": 1400
},
{
"epoch": 53.57,
"learning_rate": 0.000375,
"loss": 0.0846,
"step": 1500
},
{
"epoch": 53.57,
"eval_loss": 0.36823779344558716,
"eval_runtime": 21.896,
"eval_samples_per_second": 18.04,
"eval_steps_per_second": 0.594,
"eval_wer": 0.42091764035337703,
"step": 1500
},
{
"epoch": 57.14,
"learning_rate": 0.0004,
"loss": 0.0854,
"step": 1600
},
{
"epoch": 57.14,
"eval_loss": 0.3795730471611023,
"eval_runtime": 21.5795,
"eval_samples_per_second": 18.304,
"eval_steps_per_second": 0.602,
"eval_wer": 0.41635793673411226,
"step": 1600
},
{
"epoch": 60.71,
"learning_rate": 0.000425,
"loss": 0.0845,
"step": 1700
},
{
"epoch": 60.71,
"eval_loss": 0.35857513546943665,
"eval_runtime": 21.7949,
"eval_samples_per_second": 18.124,
"eval_steps_per_second": 0.596,
"eval_wer": 0.3941293815901966,
"step": 1700
},
{
"epoch": 64.29,
"learning_rate": 0.00045000000000000004,
"loss": 0.0854,
"step": 1800
},
{
"epoch": 64.29,
"eval_loss": 0.376005083322525,
"eval_runtime": 21.5391,
"eval_samples_per_second": 18.339,
"eval_steps_per_second": 0.604,
"eval_wer": 0.41664291821031635,
"step": 1800
},
{
"epoch": 67.86,
"learning_rate": 0.000475,
"loss": 0.0846,
"step": 1900
},
{
"epoch": 67.86,
"eval_loss": 0.3710671663284302,
"eval_runtime": 21.5817,
"eval_samples_per_second": 18.303,
"eval_steps_per_second": 0.602,
"eval_wer": 0.41208321459105157,
"step": 1900
},
{
"epoch": 71.43,
"learning_rate": 0.0005,
"loss": 0.0827,
"step": 2000
},
{
"epoch": 71.43,
"eval_loss": 0.40105244517326355,
"eval_runtime": 21.6021,
"eval_samples_per_second": 18.285,
"eval_steps_per_second": 0.602,
"eval_wer": 0.4160729552579082,
"step": 2000
},
{
"epoch": 75.0,
"learning_rate": 0.0,
"loss": 0.0665,
"step": 2100
},
{
"epoch": 75.0,
"eval_loss": 0.3478299379348755,
"eval_runtime": 21.8418,
"eval_samples_per_second": 18.085,
"eval_steps_per_second": 0.595,
"eval_wer": 0.36534625249358793,
"step": 2100
},
{
"epoch": 75.0,
"step": 2100,
"total_flos": 5.419723804611084e+19,
"train_loss": 0.8885498528253465,
"train_runtime": 26470.8294,
"train_samples_per_second": 10.064,
"train_steps_per_second": 0.079
}
],
"logging_steps": 100,
"max_steps": 2100,
"num_train_epochs": 75,
"save_steps": 100,
"total_flos": 5.419723804611084e+19,
"trial_name": null,
"trial_params": null
}