|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 17.999072356215212, |
|
"global_step": 6462, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.56, |
|
"eval_loss": 2.8832294940948486, |
|
"eval_runtime": 56.6037, |
|
"eval_samples_per_second": 9.045, |
|
"eval_steps_per_second": 1.131, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_loss": 1.1704888343811035, |
|
"eval_runtime": 56.4819, |
|
"eval_samples_per_second": 9.065, |
|
"eval_steps_per_second": 1.133, |
|
"eval_wer": 0.7788489628980426, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.00029939999999999996, |
|
"loss": 3.3987, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"eval_loss": 0.7739295959472656, |
|
"eval_runtime": 57.0638, |
|
"eval_samples_per_second": 8.972, |
|
"eval_steps_per_second": 1.122, |
|
"eval_wer": 0.5895413380075957, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"eval_loss": 0.6045235395431519, |
|
"eval_runtime": 56.2609, |
|
"eval_samples_per_second": 9.1, |
|
"eval_steps_per_second": 1.138, |
|
"eval_wer": 0.4902132632193982, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 0.0002748909761824891, |
|
"loss": 0.8313, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"eval_loss": 0.5234911441802979, |
|
"eval_runtime": 56.3102, |
|
"eval_samples_per_second": 9.092, |
|
"eval_steps_per_second": 1.137, |
|
"eval_wer": 0.43938066023955596, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"eval_loss": 0.48238545656204224, |
|
"eval_runtime": 56.3832, |
|
"eval_samples_per_second": 9.081, |
|
"eval_steps_per_second": 1.135, |
|
"eval_wer": 0.4002337131171487, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"eval_loss": 0.4378102719783783, |
|
"eval_runtime": 56.2821, |
|
"eval_samples_per_second": 9.097, |
|
"eval_steps_per_second": 1.137, |
|
"eval_wer": 0.37540169442009935, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 0.00024973163367997313, |
|
"loss": 0.5342, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"eval_loss": 0.44333964586257935, |
|
"eval_runtime": 56.3381, |
|
"eval_samples_per_second": 9.088, |
|
"eval_steps_per_second": 1.136, |
|
"eval_wer": 0.36342389716622847, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"eval_loss": 0.41031020879745483, |
|
"eval_runtime": 56.4477, |
|
"eval_samples_per_second": 9.07, |
|
"eval_steps_per_second": 1.134, |
|
"eval_wer": 0.34852468594799885, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"learning_rate": 0.00022457229117745723, |
|
"loss": 0.3792, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"eval_loss": 0.3816453218460083, |
|
"eval_runtime": 56.4551, |
|
"eval_samples_per_second": 9.069, |
|
"eval_steps_per_second": 1.134, |
|
"eval_wer": 0.33099620216184633, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"eval_loss": 0.3952919840812683, |
|
"eval_runtime": 56.4457, |
|
"eval_samples_per_second": 9.071, |
|
"eval_steps_per_second": 1.134, |
|
"eval_wer": 0.32252410166520595, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 6.68, |
|
"eval_loss": 0.39945441484451294, |
|
"eval_runtime": 56.6634, |
|
"eval_samples_per_second": 9.036, |
|
"eval_steps_per_second": 1.129, |
|
"eval_wer": 0.31317557697925796, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"learning_rate": 0.00019941294867494128, |
|
"loss": 0.2924, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"eval_loss": 0.3906857967376709, |
|
"eval_runtime": 56.3831, |
|
"eval_samples_per_second": 9.081, |
|
"eval_steps_per_second": 1.135, |
|
"eval_wer": 0.2930178206251826, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"eval_loss": 0.35171157121658325, |
|
"eval_runtime": 56.7082, |
|
"eval_samples_per_second": 9.029, |
|
"eval_steps_per_second": 1.129, |
|
"eval_wer": 0.2740286298568507, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"learning_rate": 0.00017425360617242535, |
|
"loss": 0.2217, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"eval_loss": 0.33607447147369385, |
|
"eval_runtime": 56.5616, |
|
"eval_samples_per_second": 9.052, |
|
"eval_steps_per_second": 1.132, |
|
"eval_wer": 0.2591294186386211, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 8.91, |
|
"eval_loss": 0.3340049088001251, |
|
"eval_runtime": 56.5741, |
|
"eval_samples_per_second": 9.05, |
|
"eval_steps_per_second": 1.131, |
|
"eval_wer": 0.2451066316096991, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"eval_loss": 0.3125685453414917, |
|
"eval_runtime": 56.4726, |
|
"eval_samples_per_second": 9.066, |
|
"eval_steps_per_second": 1.133, |
|
"eval_wer": 0.2448144902132632, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 9.75, |
|
"learning_rate": 0.00014909426366990943, |
|
"loss": 0.1714, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 10.03, |
|
"eval_loss": 0.34412676095962524, |
|
"eval_runtime": 56.5983, |
|
"eval_samples_per_second": 9.046, |
|
"eval_steps_per_second": 1.131, |
|
"eval_wer": 0.2556237218813906, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 10.58, |
|
"eval_loss": 0.3404456079006195, |
|
"eval_runtime": 56.5608, |
|
"eval_samples_per_second": 9.052, |
|
"eval_steps_per_second": 1.132, |
|
"eval_wer": 0.2521180251241601, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 11.14, |
|
"learning_rate": 0.0001239349211673935, |
|
"loss": 0.1395, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 11.14, |
|
"eval_loss": 0.3728441894054413, |
|
"eval_runtime": 56.7823, |
|
"eval_samples_per_second": 9.017, |
|
"eval_steps_per_second": 1.127, |
|
"eval_wer": 0.25182588372772424, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 11.7, |
|
"eval_loss": 0.3828706741333008, |
|
"eval_runtime": 56.4473, |
|
"eval_samples_per_second": 9.07, |
|
"eval_steps_per_second": 1.134, |
|
"eval_wer": 0.23955594507741748, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 12.26, |
|
"eval_loss": 0.3465881943702698, |
|
"eval_runtime": 56.3386, |
|
"eval_samples_per_second": 9.088, |
|
"eval_steps_per_second": 1.136, |
|
"eval_wer": 0.23605024832018698, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 12.53, |
|
"learning_rate": 9.877557866487755e-05, |
|
"loss": 0.1069, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 12.81, |
|
"eval_loss": 0.3187991976737976, |
|
"eval_runtime": 56.3638, |
|
"eval_samples_per_second": 9.084, |
|
"eval_steps_per_second": 1.135, |
|
"eval_wer": 0.2240724510663161, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 13.37, |
|
"eval_loss": 0.3395535349845886, |
|
"eval_runtime": 56.3257, |
|
"eval_samples_per_second": 9.09, |
|
"eval_steps_per_second": 1.136, |
|
"eval_wer": 0.21969033011977798, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 13.93, |
|
"learning_rate": 7.361623616236162e-05, |
|
"loss": 0.0845, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 13.93, |
|
"eval_loss": 0.3364916741847992, |
|
"eval_runtime": 56.321, |
|
"eval_samples_per_second": 9.091, |
|
"eval_steps_per_second": 1.136, |
|
"eval_wer": 0.2205667543090856, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 14.48, |
|
"eval_loss": 0.3458584249019623, |
|
"eval_runtime": 56.3485, |
|
"eval_samples_per_second": 9.086, |
|
"eval_steps_per_second": 1.136, |
|
"eval_wer": 0.22085889570552147, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 15.04, |
|
"eval_loss": 0.3429270386695862, |
|
"eval_runtime": 57.9716, |
|
"eval_samples_per_second": 8.832, |
|
"eval_steps_per_second": 1.104, |
|
"eval_wer": 0.2193981887233421, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 15.32, |
|
"learning_rate": 4.845689365984569e-05, |
|
"loss": 0.0675, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 15.6, |
|
"eval_loss": 0.3433798849582672, |
|
"eval_runtime": 56.3488, |
|
"eval_samples_per_second": 9.086, |
|
"eval_steps_per_second": 1.136, |
|
"eval_wer": 0.2182296231375986, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 16.16, |
|
"eval_loss": 0.3434172570705414, |
|
"eval_runtime": 56.3181, |
|
"eval_samples_per_second": 9.091, |
|
"eval_steps_per_second": 1.136, |
|
"eval_wer": 0.20829681565877886, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 16.71, |
|
"learning_rate": 2.329755115732975e-05, |
|
"loss": 0.0561, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 16.71, |
|
"eval_loss": 0.33747875690460205, |
|
"eval_runtime": 56.3763, |
|
"eval_samples_per_second": 9.082, |
|
"eval_steps_per_second": 1.135, |
|
"eval_wer": 0.20362255331580484, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 17.27, |
|
"eval_loss": 0.3445747494697571, |
|
"eval_runtime": 56.6382, |
|
"eval_samples_per_second": 9.04, |
|
"eval_steps_per_second": 1.13, |
|
"eval_wer": 0.19865614957639496, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 17.83, |
|
"eval_loss": 0.33622780442237854, |
|
"eval_runtime": 56.3321, |
|
"eval_samples_per_second": 9.089, |
|
"eval_steps_per_second": 1.136, |
|
"eval_wer": 0.19777972538708735, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"step": 6462, |
|
"total_flos": 3.4883692787272507e+19, |
|
"train_loss": 0.4896983008235647, |
|
"train_runtime": 62213.9785, |
|
"train_samples_per_second": 2.494, |
|
"train_steps_per_second": 0.104 |
|
} |
|
], |
|
"max_steps": 6462, |
|
"num_train_epochs": 18, |
|
"total_flos": 3.4883692787272507e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|