freddyw's picture
End of training
0b3cdb9
raw
history blame
15.9 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 40.0,
"global_step": 9200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.43,
"learning_rate": 3.675e-06,
"loss": 10.3748,
"step": 100
},
{
"epoch": 0.87,
"learning_rate": 7.35e-06,
"loss": 9.8141,
"step": 200
},
{
"epoch": 1.3,
"learning_rate": 1.1099999999999999e-05,
"loss": 5.6549,
"step": 300
},
{
"epoch": 1.74,
"learning_rate": 1.485e-05,
"loss": 3.4055,
"step": 400
},
{
"epoch": 2.17,
"learning_rate": 1.8599999999999998e-05,
"loss": 3.086,
"step": 500
},
{
"epoch": 2.17,
"eval_loss": 3.0773141384124756,
"eval_runtime": 54.4742,
"eval_samples_per_second": 15.145,
"eval_steps_per_second": 0.955,
"eval_wer": 1.0,
"step": 500
},
{
"epoch": 2.61,
"learning_rate": 2.2349999999999998e-05,
"loss": 2.9844,
"step": 600
},
{
"epoch": 3.04,
"learning_rate": 2.6099999999999997e-05,
"loss": 2.957,
"step": 700
},
{
"epoch": 3.48,
"learning_rate": 2.985e-05,
"loss": 2.912,
"step": 800
},
{
"epoch": 3.91,
"learning_rate": 3.36e-05,
"loss": 2.8929,
"step": 900
},
{
"epoch": 4.35,
"learning_rate": 3.735e-05,
"loss": 2.8532,
"step": 1000
},
{
"epoch": 4.35,
"eval_loss": 2.839279890060425,
"eval_runtime": 54.2914,
"eval_samples_per_second": 15.196,
"eval_steps_per_second": 0.958,
"eval_wer": 1.0,
"step": 1000
},
{
"epoch": 4.78,
"learning_rate": 4.11e-05,
"loss": 2.595,
"step": 1100
},
{
"epoch": 5.22,
"learning_rate": 4.484999999999999e-05,
"loss": 1.9229,
"step": 1200
},
{
"epoch": 5.65,
"learning_rate": 4.8599999999999995e-05,
"loss": 1.3521,
"step": 1300
},
{
"epoch": 6.09,
"learning_rate": 5.234999999999999e-05,
"loss": 1.0772,
"step": 1400
},
{
"epoch": 6.52,
"learning_rate": 5.6099999999999995e-05,
"loss": 0.9738,
"step": 1500
},
{
"epoch": 6.52,
"eval_loss": 0.728294849395752,
"eval_runtime": 54.3803,
"eval_samples_per_second": 15.171,
"eval_steps_per_second": 0.956,
"eval_wer": 0.4889855346506571,
"step": 1500
},
{
"epoch": 6.96,
"learning_rate": 5.985e-05,
"loss": 0.891,
"step": 1600
},
{
"epoch": 7.39,
"learning_rate": 6.359999999999999e-05,
"loss": 0.7848,
"step": 1700
},
{
"epoch": 7.83,
"learning_rate": 6.735e-05,
"loss": 0.7527,
"step": 1800
},
{
"epoch": 8.26,
"learning_rate": 7.11e-05,
"loss": 0.7279,
"step": 1900
},
{
"epoch": 8.7,
"learning_rate": 7.484999999999999e-05,
"loss": 0.6763,
"step": 2000
},
{
"epoch": 8.7,
"eval_loss": 0.5340386033058167,
"eval_runtime": 53.7943,
"eval_samples_per_second": 15.336,
"eval_steps_per_second": 0.967,
"eval_wer": 0.36617188238630993,
"step": 2000
},
{
"epoch": 9.13,
"learning_rate": 7.4e-05,
"loss": 0.6165,
"step": 2100
},
{
"epoch": 9.57,
"learning_rate": 7.295833333333332e-05,
"loss": 0.6021,
"step": 2200
},
{
"epoch": 10.0,
"learning_rate": 7.191666666666666e-05,
"loss": 0.5904,
"step": 2300
},
{
"epoch": 10.43,
"learning_rate": 7.087499999999999e-05,
"loss": 0.5469,
"step": 2400
},
{
"epoch": 10.87,
"learning_rate": 6.983333333333333e-05,
"loss": 0.5303,
"step": 2500
},
{
"epoch": 10.87,
"eval_loss": 0.45213818550109863,
"eval_runtime": 53.7095,
"eval_samples_per_second": 15.36,
"eval_steps_per_second": 0.968,
"eval_wer": 0.31398317103148343,
"step": 2500
},
{
"epoch": 11.3,
"learning_rate": 6.879166666666667e-05,
"loss": 0.5061,
"step": 2600
},
{
"epoch": 11.74,
"learning_rate": 6.775e-05,
"loss": 0.4928,
"step": 2700
},
{
"epoch": 12.17,
"learning_rate": 6.670833333333333e-05,
"loss": 0.4693,
"step": 2800
},
{
"epoch": 12.61,
"learning_rate": 6.566666666666666e-05,
"loss": 0.4733,
"step": 2900
},
{
"epoch": 13.04,
"learning_rate": 6.4625e-05,
"loss": 0.4765,
"step": 3000
},
{
"epoch": 13.04,
"eval_loss": 0.4181167483329773,
"eval_runtime": 53.0777,
"eval_samples_per_second": 15.543,
"eval_steps_per_second": 0.98,
"eval_wer": 0.2853361066464971,
"step": 3000
},
{
"epoch": 13.48,
"learning_rate": 6.358333333333332e-05,
"loss": 0.4396,
"step": 3100
},
{
"epoch": 13.91,
"learning_rate": 6.254166666666666e-05,
"loss": 0.4359,
"step": 3200
},
{
"epoch": 14.35,
"learning_rate": 6.149999999999999e-05,
"loss": 0.4173,
"step": 3300
},
{
"epoch": 14.78,
"learning_rate": 6.045833333333333e-05,
"loss": 0.408,
"step": 3400
},
{
"epoch": 15.22,
"learning_rate": 5.941666666666666e-05,
"loss": 0.4219,
"step": 3500
},
{
"epoch": 15.22,
"eval_loss": 0.41555172204971313,
"eval_runtime": 52.8273,
"eval_samples_per_second": 15.617,
"eval_steps_per_second": 0.984,
"eval_wer": 0.2933724118370048,
"step": 3500
},
{
"epoch": 15.65,
"learning_rate": 5.8374999999999996e-05,
"loss": 0.4022,
"step": 3600
},
{
"epoch": 16.09,
"learning_rate": 5.733333333333333e-05,
"loss": 0.3932,
"step": 3700
},
{
"epoch": 16.52,
"learning_rate": 5.629166666666666e-05,
"loss": 0.374,
"step": 3800
},
{
"epoch": 16.96,
"learning_rate": 5.5249999999999994e-05,
"loss": 0.3697,
"step": 3900
},
{
"epoch": 17.39,
"learning_rate": 5.420833333333333e-05,
"loss": 0.3564,
"step": 4000
},
{
"epoch": 17.39,
"eval_loss": 0.3925444483757019,
"eval_runtime": 54.1337,
"eval_samples_per_second": 15.24,
"eval_steps_per_second": 0.961,
"eval_wer": 0.2509218114777347,
"step": 4000
},
{
"epoch": 17.83,
"learning_rate": 5.316666666666666e-05,
"loss": 0.3784,
"step": 4100
},
{
"epoch": 18.26,
"learning_rate": 5.212499999999999e-05,
"loss": 0.3805,
"step": 4200
},
{
"epoch": 18.7,
"learning_rate": 5.1083333333333326e-05,
"loss": 0.3419,
"step": 4300
},
{
"epoch": 19.13,
"learning_rate": 5.0041666666666666e-05,
"loss": 0.3344,
"step": 4400
},
{
"epoch": 19.57,
"learning_rate": 4.899999999999999e-05,
"loss": 0.3282,
"step": 4500
},
{
"epoch": 19.57,
"eval_loss": 0.3824474811553955,
"eval_runtime": 54.4571,
"eval_samples_per_second": 15.15,
"eval_steps_per_second": 0.955,
"eval_wer": 0.24203460338470265,
"step": 4500
},
{
"epoch": 20.0,
"learning_rate": 4.795833333333333e-05,
"loss": 0.3447,
"step": 4600
},
{
"epoch": 20.43,
"learning_rate": 4.691666666666666e-05,
"loss": 0.3308,
"step": 4700
},
{
"epoch": 20.87,
"learning_rate": 4.5875e-05,
"loss": 0.3056,
"step": 4800
},
{
"epoch": 21.3,
"learning_rate": 4.483333333333333e-05,
"loss": 0.3069,
"step": 4900
},
{
"epoch": 21.74,
"learning_rate": 4.379166666666666e-05,
"loss": 0.3118,
"step": 5000
},
{
"epoch": 21.74,
"eval_loss": 0.36364665627479553,
"eval_runtime": 52.4627,
"eval_samples_per_second": 15.725,
"eval_steps_per_second": 0.991,
"eval_wer": 0.2354164696984022,
"step": 5000
},
{
"epoch": 22.17,
"learning_rate": 4.2749999999999996e-05,
"loss": 0.3108,
"step": 5100
},
{
"epoch": 22.61,
"learning_rate": 4.170833333333333e-05,
"loss": 0.3034,
"step": 5200
},
{
"epoch": 23.04,
"learning_rate": 4.066666666666667e-05,
"loss": 0.3132,
"step": 5300
},
{
"epoch": 23.48,
"learning_rate": 3.9624999999999994e-05,
"loss": 0.3024,
"step": 5400
},
{
"epoch": 23.91,
"learning_rate": 3.8583333333333334e-05,
"loss": 0.2919,
"step": 5500
},
{
"epoch": 23.91,
"eval_loss": 0.3614845275878906,
"eval_runtime": 52.502,
"eval_samples_per_second": 15.714,
"eval_steps_per_second": 0.99,
"eval_wer": 0.22813652264347167,
"step": 5500
},
{
"epoch": 24.35,
"learning_rate": 3.754166666666666e-05,
"loss": 0.2868,
"step": 5600
},
{
"epoch": 24.78,
"learning_rate": 3.65e-05,
"loss": 0.2929,
"step": 5700
},
{
"epoch": 25.22,
"learning_rate": 3.545833333333333e-05,
"loss": 0.2956,
"step": 5800
},
{
"epoch": 25.65,
"learning_rate": 3.4416666666666665e-05,
"loss": 0.2818,
"step": 5900
},
{
"epoch": 26.09,
"learning_rate": 3.3375e-05,
"loss": 0.2961,
"step": 6000
},
{
"epoch": 26.09,
"eval_loss": 0.35476088523864746,
"eval_runtime": 52.7192,
"eval_samples_per_second": 15.649,
"eval_steps_per_second": 0.986,
"eval_wer": 0.2254892691689515,
"step": 6000
},
{
"epoch": 26.52,
"learning_rate": 3.233333333333333e-05,
"loss": 0.2816,
"step": 6100
},
{
"epoch": 26.96,
"learning_rate": 3.1291666666666664e-05,
"loss": 0.2788,
"step": 6200
},
{
"epoch": 27.39,
"learning_rate": 3.0249999999999997e-05,
"loss": 0.272,
"step": 6300
},
{
"epoch": 27.83,
"learning_rate": 2.920833333333333e-05,
"loss": 0.2756,
"step": 6400
},
{
"epoch": 28.26,
"learning_rate": 2.8166666666666662e-05,
"loss": 0.284,
"step": 6500
},
{
"epoch": 28.26,
"eval_loss": 0.3526020646095276,
"eval_runtime": 52.8188,
"eval_samples_per_second": 15.619,
"eval_steps_per_second": 0.984,
"eval_wer": 0.2208565755885412,
"step": 6500
},
{
"epoch": 28.7,
"learning_rate": 2.7125e-05,
"loss": 0.2721,
"step": 6600
},
{
"epoch": 29.13,
"learning_rate": 2.608333333333333e-05,
"loss": 0.2601,
"step": 6700
},
{
"epoch": 29.57,
"learning_rate": 2.5041666666666664e-05,
"loss": 0.2462,
"step": 6800
},
{
"epoch": 30.0,
"learning_rate": 2.3999999999999997e-05,
"loss": 0.271,
"step": 6900
},
{
"epoch": 30.43,
"learning_rate": 2.295833333333333e-05,
"loss": 0.2566,
"step": 7000
},
{
"epoch": 30.43,
"eval_loss": 0.35256850719451904,
"eval_runtime": 52.6434,
"eval_samples_per_second": 15.671,
"eval_steps_per_second": 0.988,
"eval_wer": 0.22047839652075257,
"step": 7000
},
{
"epoch": 30.87,
"learning_rate": 2.1916666666666663e-05,
"loss": 0.2612,
"step": 7100
},
{
"epoch": 31.3,
"learning_rate": 2.0874999999999996e-05,
"loss": 0.2524,
"step": 7200
},
{
"epoch": 31.74,
"learning_rate": 1.983333333333333e-05,
"loss": 0.254,
"step": 7300
},
{
"epoch": 32.17,
"learning_rate": 1.879166666666666e-05,
"loss": 0.2446,
"step": 7400
},
{
"epoch": 32.61,
"learning_rate": 1.7749999999999998e-05,
"loss": 0.2422,
"step": 7500
},
{
"epoch": 32.61,
"eval_loss": 0.35690072178840637,
"eval_runtime": 53.8708,
"eval_samples_per_second": 15.314,
"eval_steps_per_second": 0.965,
"eval_wer": 0.2172638744445495,
"step": 7500
},
{
"epoch": 33.04,
"learning_rate": 1.670833333333333e-05,
"loss": 0.2549,
"step": 7600
},
{
"epoch": 33.48,
"learning_rate": 1.5666666666666667e-05,
"loss": 0.2547,
"step": 7700
},
{
"epoch": 33.91,
"learning_rate": 1.4625e-05,
"loss": 0.2553,
"step": 7800
},
{
"epoch": 34.35,
"learning_rate": 1.3583333333333333e-05,
"loss": 0.2445,
"step": 7900
},
{
"epoch": 34.78,
"learning_rate": 1.2541666666666665e-05,
"loss": 0.2472,
"step": 8000
},
{
"epoch": 34.78,
"eval_loss": 0.35918259620666504,
"eval_runtime": 52.3221,
"eval_samples_per_second": 15.768,
"eval_steps_per_second": 0.994,
"eval_wer": 0.21660206107591945,
"step": 8000
},
{
"epoch": 35.22,
"learning_rate": 1.1499999999999998e-05,
"loss": 0.2608,
"step": 8100
},
{
"epoch": 35.65,
"learning_rate": 1.0458333333333333e-05,
"loss": 0.2482,
"step": 8200
},
{
"epoch": 36.09,
"learning_rate": 9.416666666666666e-06,
"loss": 0.2464,
"step": 8300
},
{
"epoch": 36.52,
"learning_rate": 8.374999999999999e-06,
"loss": 0.2347,
"step": 8400
},
{
"epoch": 36.96,
"learning_rate": 7.333333333333333e-06,
"loss": 0.2337,
"step": 8500
},
{
"epoch": 36.96,
"eval_loss": 0.36245495080947876,
"eval_runtime": 54.1316,
"eval_samples_per_second": 15.241,
"eval_steps_per_second": 0.961,
"eval_wer": 0.21716932967760236,
"step": 8500
},
{
"epoch": 37.39,
"learning_rate": 6.291666666666666e-06,
"loss": 0.2392,
"step": 8600
},
{
"epoch": 37.83,
"learning_rate": 5.25e-06,
"loss": 0.2347,
"step": 8700
},
{
"epoch": 38.26,
"learning_rate": 4.208333333333333e-06,
"loss": 0.2455,
"step": 8800
},
{
"epoch": 38.7,
"learning_rate": 3.1666666666666663e-06,
"loss": 0.243,
"step": 8900
},
{
"epoch": 39.13,
"learning_rate": 2.1249999999999996e-06,
"loss": 0.2315,
"step": 9000
},
{
"epoch": 39.13,
"eval_loss": 0.3579612374305725,
"eval_runtime": 52.3246,
"eval_samples_per_second": 15.767,
"eval_steps_per_second": 0.994,
"eval_wer": 0.21546752387255366,
"step": 9000
},
{
"epoch": 39.57,
"learning_rate": 1.0833333333333333e-06,
"loss": 0.2489,
"step": 9100
},
{
"epoch": 40.0,
"learning_rate": 4.166666666666666e-08,
"loss": 0.227,
"step": 9200
},
{
"epoch": 40.0,
"step": 9200,
"total_flos": 6.4078116974997864e+19,
"train_loss": 0.896209309619406,
"train_runtime": 25745.3803,
"train_samples_per_second": 11.426,
"train_steps_per_second": 0.357
}
],
"max_steps": 9200,
"num_train_epochs": 40,
"total_flos": 6.4078116974997864e+19,
"trial_name": null,
"trial_params": null
}