|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 100.0, |
|
"global_step": 5900, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 5.88e-05, |
|
"loss": 7.6789, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 0.0001188, |
|
"loss": 3.2952, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 0.00017879999999999998, |
|
"loss": 3.0495, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 0.0002388, |
|
"loss": 2.7948, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"learning_rate": 0.0002988, |
|
"loss": 1.8597, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"eval_loss": 0.7730758786201477, |
|
"eval_runtime": 49.5825, |
|
"eval_samples_per_second": 16.135, |
|
"eval_steps_per_second": 16.135, |
|
"eval_wer": 0.7211251598238386, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 10.17, |
|
"learning_rate": 0.00029455555555555555, |
|
"loss": 1.4933, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 11.86, |
|
"learning_rate": 0.000289, |
|
"loss": 1.365, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 13.56, |
|
"learning_rate": 0.0002834444444444444, |
|
"loss": 1.3187, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 15.25, |
|
"learning_rate": 0.0002778888888888889, |
|
"loss": 1.2744, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 16.95, |
|
"learning_rate": 0.0002723333333333333, |
|
"loss": 1.2508, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 16.95, |
|
"eval_loss": 0.5367600321769714, |
|
"eval_runtime": 48.7576, |
|
"eval_samples_per_second": 16.408, |
|
"eval_steps_per_second": 16.408, |
|
"eval_wer": 0.5989487143060094, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 18.64, |
|
"learning_rate": 0.0002667777777777778, |
|
"loss": 1.1856, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 20.34, |
|
"learning_rate": 0.0002612222222222222, |
|
"loss": 1.1821, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 22.03, |
|
"learning_rate": 0.00025566666666666663, |
|
"loss": 1.1284, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 23.73, |
|
"learning_rate": 0.0002501111111111111, |
|
"loss": 1.1204, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 25.42, |
|
"learning_rate": 0.00024455555555555553, |
|
"loss": 1.1066, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 25.42, |
|
"eval_loss": 0.5033634901046753, |
|
"eval_runtime": 48.524, |
|
"eval_samples_per_second": 16.487, |
|
"eval_steps_per_second": 16.487, |
|
"eval_wer": 0.553345645688308, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 27.12, |
|
"learning_rate": 0.00023905555555555553, |
|
"loss": 1.0826, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 28.81, |
|
"learning_rate": 0.00023349999999999998, |
|
"loss": 1.0375, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 30.51, |
|
"learning_rate": 0.00022794444444444443, |
|
"loss": 1.0304, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 32.2, |
|
"learning_rate": 0.00022238888888888889, |
|
"loss": 1.0207, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 33.9, |
|
"learning_rate": 0.0002168333333333333, |
|
"loss": 1.0064, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 33.9, |
|
"eval_loss": 0.46856507658958435, |
|
"eval_runtime": 48.9718, |
|
"eval_samples_per_second": 16.336, |
|
"eval_steps_per_second": 16.336, |
|
"eval_wer": 0.5114362835630061, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 35.59, |
|
"learning_rate": 0.00021127777777777776, |
|
"loss": 0.9979, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 37.29, |
|
"learning_rate": 0.0002057222222222222, |
|
"loss": 0.9902, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 38.98, |
|
"learning_rate": 0.00020016666666666666, |
|
"loss": 0.9511, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 40.68, |
|
"learning_rate": 0.00019461111111111109, |
|
"loss": 0.9584, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 42.37, |
|
"learning_rate": 0.00018905555555555554, |
|
"loss": 0.9324, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 42.37, |
|
"eval_loss": 0.49271178245544434, |
|
"eval_runtime": 48.7262, |
|
"eval_samples_per_second": 16.418, |
|
"eval_steps_per_second": 16.418, |
|
"eval_wer": 0.5056115925557607, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 44.07, |
|
"learning_rate": 0.0001835, |
|
"loss": 0.9287, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 45.76, |
|
"learning_rate": 0.00017794444444444444, |
|
"loss": 0.9264, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 47.46, |
|
"learning_rate": 0.00017238888888888886, |
|
"loss": 0.9031, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 49.15, |
|
"learning_rate": 0.00016683333333333331, |
|
"loss": 0.8996, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 50.85, |
|
"learning_rate": 0.00016127777777777776, |
|
"loss": 0.876, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 50.85, |
|
"eval_loss": 0.4733906686306, |
|
"eval_runtime": 48.5334, |
|
"eval_samples_per_second": 16.484, |
|
"eval_steps_per_second": 16.484, |
|
"eval_wer": 0.4794715158403182, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 52.54, |
|
"learning_rate": 0.0001557222222222222, |
|
"loss": 0.8432, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 54.24, |
|
"learning_rate": 0.00015016666666666664, |
|
"loss": 0.8592, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 55.93, |
|
"learning_rate": 0.0001446111111111111, |
|
"loss": 0.8322, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 57.63, |
|
"learning_rate": 0.00013905555555555554, |
|
"loss": 0.8286, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 59.32, |
|
"learning_rate": 0.0001335, |
|
"loss": 0.8082, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 59.32, |
|
"eval_loss": 0.474797785282135, |
|
"eval_runtime": 48.7764, |
|
"eval_samples_per_second": 16.401, |
|
"eval_steps_per_second": 16.401, |
|
"eval_wer": 0.4798977127432874, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 61.02, |
|
"learning_rate": 0.00012794444444444442, |
|
"loss": 0.8101, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 62.71, |
|
"learning_rate": 0.0001223888888888889, |
|
"loss": 0.7805, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 64.41, |
|
"learning_rate": 0.00011683333333333332, |
|
"loss": 0.7741, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 66.1, |
|
"learning_rate": 0.00011127777777777777, |
|
"loss": 0.773, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 67.8, |
|
"learning_rate": 0.0001057222222222222, |
|
"loss": 0.7604, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 67.8, |
|
"eval_loss": 0.49494636058807373, |
|
"eval_runtime": 48.9958, |
|
"eval_samples_per_second": 16.328, |
|
"eval_steps_per_second": 16.328, |
|
"eval_wer": 0.46910072453473506, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 69.49, |
|
"learning_rate": 0.00010016666666666666, |
|
"loss": 0.7527, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 71.19, |
|
"learning_rate": 9.46111111111111e-05, |
|
"loss": 0.7354, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 72.88, |
|
"learning_rate": 8.905555555555555e-05, |
|
"loss": 0.7334, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 74.58, |
|
"learning_rate": 8.349999999999998e-05, |
|
"loss": 0.7286, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 76.27, |
|
"learning_rate": 7.794444444444445e-05, |
|
"loss": 0.7241, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 76.27, |
|
"eval_loss": 0.5090368390083313, |
|
"eval_runtime": 49.0851, |
|
"eval_samples_per_second": 16.298, |
|
"eval_steps_per_second": 16.298, |
|
"eval_wer": 0.4627077709901975, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 77.97, |
|
"learning_rate": 7.238888888888889e-05, |
|
"loss": 0.7065, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 79.66, |
|
"learning_rate": 6.683333333333332e-05, |
|
"loss": 0.6978, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 81.36, |
|
"learning_rate": 6.127777777777777e-05, |
|
"loss": 0.688, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 83.05, |
|
"learning_rate": 5.572222222222222e-05, |
|
"loss": 0.6725, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 84.75, |
|
"learning_rate": 5.016666666666666e-05, |
|
"loss": 0.6739, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 84.75, |
|
"eval_loss": 0.49671733379364014, |
|
"eval_runtime": 50.1856, |
|
"eval_samples_per_second": 15.941, |
|
"eval_steps_per_second": 15.941, |
|
"eval_wer": 0.4452336979684614, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 86.44, |
|
"learning_rate": 4.4611111111111106e-05, |
|
"loss": 0.6675, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 88.14, |
|
"learning_rate": 3.905555555555555e-05, |
|
"loss": 0.6548, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 89.83, |
|
"learning_rate": 3.3499999999999994e-05, |
|
"loss": 0.6465, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 91.53, |
|
"learning_rate": 2.8e-05, |
|
"loss": 0.6471, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 93.22, |
|
"learning_rate": 2.2444444444444444e-05, |
|
"loss": 0.6447, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 93.22, |
|
"eval_loss": 0.5071204900741577, |
|
"eval_runtime": 50.5031, |
|
"eval_samples_per_second": 15.841, |
|
"eval_steps_per_second": 15.841, |
|
"eval_wer": 0.4436709759909078, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 94.92, |
|
"learning_rate": 1.6888888888888888e-05, |
|
"loss": 0.6362, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 96.61, |
|
"learning_rate": 1.1333333333333332e-05, |
|
"loss": 0.6434, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 98.31, |
|
"learning_rate": 5.777777777777778e-06, |
|
"loss": 0.6269, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 2.222222222222222e-07, |
|
"loss": 0.6369, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"step": 5900, |
|
"total_flos": 3.4027105250131104e+19, |
|
"train_loss": 1.1293389116707495, |
|
"train_runtime": 13204.1924, |
|
"train_samples_per_second": 14.064, |
|
"train_steps_per_second": 0.447 |
|
} |
|
], |
|
"max_steps": 5900, |
|
"num_train_epochs": 100, |
|
"total_flos": 3.4027105250131104e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|