|
{ |
|
"best_metric": 53.62439467312349, |
|
"best_model_checkpoint": "./checkpoint-300", |
|
"epoch": 3.566371681415929, |
|
"global_step": 300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 3e-06, |
|
"loss": 4.1495, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 6.333333333333333e-06, |
|
"loss": 2.9287, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 9.666666666666667e-06, |
|
"loss": 2.0462, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 9.666666666666667e-06, |
|
"loss": 1.6138, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 9.296296296296296e-06, |
|
"loss": 1.3862, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 8.925925925925927e-06, |
|
"loss": 1.2604, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.555555555555556e-06, |
|
"loss": 1.1436, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 8.185185185185187e-06, |
|
"loss": 1.168, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 7.814814814814816e-06, |
|
"loss": 1.1041, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 7.444444444444445e-06, |
|
"loss": 0.9683, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"eval_cer": 131.61659035460045, |
|
"eval_loss": 0.8811978697776794, |
|
"eval_runtime": 593.3196, |
|
"eval_samples_per_second": 0.863, |
|
"eval_steps_per_second": 0.431, |
|
"eval_wer": 139.37651331719127, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 7.074074074074074e-06, |
|
"loss": 0.909, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 6.703703703703704e-06, |
|
"loss": 0.9213, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 6.333333333333333e-06, |
|
"loss": 0.9092, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 5.962962962962963e-06, |
|
"loss": 0.8481, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 5.5925925925925926e-06, |
|
"loss": 0.8471, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 5.2222222222222226e-06, |
|
"loss": 0.8504, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 4.851851851851852e-06, |
|
"loss": 0.8264, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 4.481481481481482e-06, |
|
"loss": 0.7236, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 4.111111111111111e-06, |
|
"loss": 0.6898, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 3.740740740740741e-06, |
|
"loss": 0.6848, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"eval_cer": 151.33685371478225, |
|
"eval_loss": 0.7542899250984192, |
|
"eval_runtime": 551.6472, |
|
"eval_samples_per_second": 0.928, |
|
"eval_steps_per_second": 0.464, |
|
"eval_wer": 145.9972760290557, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 3.3703703703703705e-06, |
|
"loss": 0.7021, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 3e-06, |
|
"loss": 0.6956, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 2.6296296296296297e-06, |
|
"loss": 0.629, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.2592592592592592e-06, |
|
"loss": 0.6661, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 1.888888888888889e-06, |
|
"loss": 0.6251, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 1.5185185185185186e-06, |
|
"loss": 0.6852, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 1.1481481481481482e-06, |
|
"loss": 0.5772, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 7.777777777777779e-07, |
|
"loss": 0.5592, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 4.074074074074075e-07, |
|
"loss": 0.5845, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 3.703703703703704e-08, |
|
"loss": 0.5548, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"eval_cer": 22.68473647271147, |
|
"eval_loss": 0.6978507041931152, |
|
"eval_runtime": 832.954, |
|
"eval_samples_per_second": 0.615, |
|
"eval_steps_per_second": 0.307, |
|
"eval_wer": 53.62439467312349, |
|
"step": 300 |
|
} |
|
], |
|
"max_steps": 300, |
|
"num_train_epochs": 4, |
|
"total_flos": 2.79033223569408e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|