|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 6.590163934426229, |
|
"global_step": 600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.5e-06, |
|
"loss": 3.5867, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 5e-06, |
|
"loss": 3.5457, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 7.5e-06, |
|
"loss": 3.4513, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1e-05, |
|
"loss": 3.3432, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.25e-05, |
|
"loss": 3.3533, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_loss": 3.2806732654571533, |
|
"eval_runtime": 190.4728, |
|
"eval_samples_per_second": 25.426, |
|
"eval_steps_per_second": 0.798, |
|
"eval_wer": 1.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.5e-05, |
|
"loss": 3.2217, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 1.7500000000000002e-05, |
|
"loss": 3.1765, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2e-05, |
|
"loss": 3.1408, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 2.2499999999999998e-05, |
|
"loss": 3.1165, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 2.5e-05, |
|
"loss": 3.1709, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"eval_loss": 3.1325438022613525, |
|
"eval_runtime": 192.4978, |
|
"eval_samples_per_second": 25.159, |
|
"eval_steps_per_second": 0.79, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 2.75e-05, |
|
"loss": 3.079, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 3e-05, |
|
"loss": 3.0677, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 3.0656, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 3.5000000000000004e-05, |
|
"loss": 3.1463, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 3.75e-05, |
|
"loss": 3.0573, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"eval_loss": 3.0614514350891113, |
|
"eval_runtime": 194.36, |
|
"eval_samples_per_second": 24.918, |
|
"eval_steps_per_second": 0.782, |
|
"eval_wer": 1.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 4e-05, |
|
"loss": 3.0511, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 4.25e-05, |
|
"loss": 3.0358, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 4.4999999999999996e-05, |
|
"loss": 3.0416, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 4.75e-05, |
|
"loss": 3.1053, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 5e-05, |
|
"loss": 3.0314, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"eval_loss": 3.0990231037139893, |
|
"eval_runtime": 198.3688, |
|
"eval_samples_per_second": 24.414, |
|
"eval_steps_per_second": 0.766, |
|
"eval_wer": 1.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 5.25e-05, |
|
"loss": 3.0309, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 5.5e-05, |
|
"loss": 3.0259, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 5.75e-05, |
|
"loss": 3.0998, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"learning_rate": 6e-05, |
|
"loss": 3.0152, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 6.25e-05, |
|
"loss": 3.0129, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"eval_loss": 3.039973497390747, |
|
"eval_runtime": 190.8567, |
|
"eval_samples_per_second": 25.375, |
|
"eval_steps_per_second": 0.796, |
|
"eval_wer": 1.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"learning_rate": 6.500000000000001e-05, |
|
"loss": 3.0088, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"learning_rate": 6.75e-05, |
|
"loss": 3.0051, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 7.000000000000001e-05, |
|
"loss": 3.073, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"learning_rate": 7.25e-05, |
|
"loss": 3.0031, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"learning_rate": 7.5e-05, |
|
"loss": 2.9964, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"eval_loss": 2.998962640762329, |
|
"eval_runtime": 193.5213, |
|
"eval_samples_per_second": 25.026, |
|
"eval_steps_per_second": 0.785, |
|
"eval_wer": 1.0, |
|
"step": 600 |
|
} |
|
], |
|
"max_steps": 4550, |
|
"num_train_epochs": 50, |
|
"total_flos": 9.32024224555606e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|