|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 149.9777777777778, |
|
"global_step": 3300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 4.3956000000000004e-05, |
|
"loss": 9.6799, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"eval_loss": 3.611884593963623, |
|
"eval_runtime": 14.8755, |
|
"eval_samples_per_second": 22.588, |
|
"eval_steps_per_second": 2.823, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 18.18, |
|
"learning_rate": 8.8356e-05, |
|
"loss": 3.1332, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 18.18, |
|
"eval_loss": 2.535223960876465, |
|
"eval_runtime": 14.5917, |
|
"eval_samples_per_second": 23.027, |
|
"eval_steps_per_second": 2.878, |
|
"eval_wer": 1.005, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 27.27, |
|
"learning_rate": 0.000132756, |
|
"loss": 1.0465, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 27.27, |
|
"eval_loss": 0.6169007420539856, |
|
"eval_runtime": 14.7692, |
|
"eval_samples_per_second": 22.75, |
|
"eval_steps_per_second": 2.844, |
|
"eval_wer": 0.682, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 36.36, |
|
"learning_rate": 0.000177156, |
|
"loss": 0.3452, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 36.36, |
|
"eval_loss": 0.6572489142417908, |
|
"eval_runtime": 14.4275, |
|
"eval_samples_per_second": 23.289, |
|
"eval_steps_per_second": 2.911, |
|
"eval_wer": 0.607, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 45.44, |
|
"learning_rate": 0.000221556, |
|
"loss": 0.2575, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 45.44, |
|
"eval_loss": 0.6526808142662048, |
|
"eval_runtime": 14.9602, |
|
"eval_samples_per_second": 22.46, |
|
"eval_steps_per_second": 2.807, |
|
"eval_wer": 0.578, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 54.53, |
|
"learning_rate": 0.0002028886956521739, |
|
"loss": 0.2088, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 54.53, |
|
"eval_loss": 0.6828188300132751, |
|
"eval_runtime": 14.4501, |
|
"eval_samples_per_second": 23.252, |
|
"eval_steps_per_second": 2.907, |
|
"eval_wer": 0.551, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 63.62, |
|
"learning_rate": 0.00018358434782608698, |
|
"loss": 0.158, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 63.62, |
|
"eval_loss": 0.707419753074646, |
|
"eval_runtime": 14.2371, |
|
"eval_samples_per_second": 23.6, |
|
"eval_steps_per_second": 2.95, |
|
"eval_wer": 0.5575, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 72.71, |
|
"learning_rate": 0.00016428, |
|
"loss": 0.1309, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 72.71, |
|
"eval_loss": 0.6523335576057434, |
|
"eval_runtime": 14.2381, |
|
"eval_samples_per_second": 23.599, |
|
"eval_steps_per_second": 2.95, |
|
"eval_wer": 0.5595, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 81.8, |
|
"learning_rate": 0.00014497565217391306, |
|
"loss": 0.1074, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 81.8, |
|
"eval_loss": 0.7262120842933655, |
|
"eval_runtime": 14.2676, |
|
"eval_samples_per_second": 23.55, |
|
"eval_steps_per_second": 2.944, |
|
"eval_wer": 0.5415, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 90.89, |
|
"learning_rate": 0.0001256713043478261, |
|
"loss": 0.087, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 90.89, |
|
"eval_loss": 0.7198619246482849, |
|
"eval_runtime": 14.0317, |
|
"eval_samples_per_second": 23.946, |
|
"eval_steps_per_second": 2.993, |
|
"eval_wer": 0.521, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 99.98, |
|
"learning_rate": 0.00010636695652173913, |
|
"loss": 0.0711, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 99.98, |
|
"eval_loss": 0.7113156318664551, |
|
"eval_runtime": 14.2007, |
|
"eval_samples_per_second": 23.661, |
|
"eval_steps_per_second": 2.958, |
|
"eval_wer": 0.523, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 109.09, |
|
"learning_rate": 8.706260869565218e-05, |
|
"loss": 0.0601, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 109.09, |
|
"eval_loss": 0.6862908601760864, |
|
"eval_runtime": 13.9453, |
|
"eval_samples_per_second": 24.094, |
|
"eval_steps_per_second": 3.012, |
|
"eval_wer": 0.496, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 118.18, |
|
"learning_rate": 6.775826086956522e-05, |
|
"loss": 0.0451, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 118.18, |
|
"eval_loss": 0.6998496055603027, |
|
"eval_runtime": 14.078, |
|
"eval_samples_per_second": 23.867, |
|
"eval_steps_per_second": 2.983, |
|
"eval_wer": 0.483, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 127.27, |
|
"learning_rate": 4.8453913043478265e-05, |
|
"loss": 0.0378, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 127.27, |
|
"eval_loss": 0.6970936059951782, |
|
"eval_runtime": 14.3534, |
|
"eval_samples_per_second": 23.409, |
|
"eval_steps_per_second": 2.926, |
|
"eval_wer": 0.4615, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 136.36, |
|
"learning_rate": 2.9149565217391308e-05, |
|
"loss": 0.0319, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 136.36, |
|
"eval_loss": 0.7119385600090027, |
|
"eval_runtime": 14.2911, |
|
"eval_samples_per_second": 23.511, |
|
"eval_steps_per_second": 2.939, |
|
"eval_wer": 0.4475, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 145.44, |
|
"learning_rate": 9.845217391304348e-06, |
|
"loss": 0.0305, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 145.44, |
|
"eval_loss": 0.7181466221809387, |
|
"eval_runtime": 14.0233, |
|
"eval_samples_per_second": 23.96, |
|
"eval_steps_per_second": 2.995, |
|
"eval_wer": 0.459, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 149.98, |
|
"step": 3300, |
|
"total_flos": 1.7623169369275333e+19, |
|
"train_loss": 0.9359948108413003, |
|
"train_runtime": 8030.991, |
|
"train_samples_per_second": 13.392, |
|
"train_steps_per_second": 0.411 |
|
} |
|
], |
|
"max_steps": 3300, |
|
"num_train_epochs": 150, |
|
"total_flos": 1.7623169369275333e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|