|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 100.0, |
|
"global_step": 2200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 5.88e-05, |
|
"loss": 9.8087, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 0.0001188, |
|
"loss": 3.8006, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 13.64, |
|
"learning_rate": 0.00017879999999999998, |
|
"loss": 3.324, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 18.18, |
|
"learning_rate": 0.0002388, |
|
"loss": 3.2068, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 22.73, |
|
"learning_rate": 0.0002988, |
|
"loss": 2.671, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 22.73, |
|
"eval_loss": 1.3617517948150635, |
|
"eval_runtime": 14.6151, |
|
"eval_samples_per_second": 20.937, |
|
"eval_steps_per_second": 1.368, |
|
"eval_wer": 0.9498596068993181, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 27.27, |
|
"learning_rate": 0.00028270588235294116, |
|
"loss": 1.8697, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 31.82, |
|
"learning_rate": 0.00026505882352941175, |
|
"loss": 1.5472, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 36.36, |
|
"learning_rate": 0.00024741176470588234, |
|
"loss": 1.3942, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 40.91, |
|
"learning_rate": 0.00022976470588235293, |
|
"loss": 1.2455, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 45.45, |
|
"learning_rate": 0.00021211764705882352, |
|
"loss": 1.1599, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 45.45, |
|
"eval_loss": 0.6329962015151978, |
|
"eval_runtime": 14.7493, |
|
"eval_samples_per_second": 20.747, |
|
"eval_steps_per_second": 1.356, |
|
"eval_wer": 0.6626554352186121, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 0.00019447058823529408, |
|
"loss": 1.0693, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 54.55, |
|
"learning_rate": 0.00017682352941176467, |
|
"loss": 0.9951, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 59.09, |
|
"learning_rate": 0.00015917647058823529, |
|
"loss": 0.9336, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 63.64, |
|
"learning_rate": 0.00014152941176470588, |
|
"loss": 0.8805, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 68.18, |
|
"learning_rate": 0.00012388235294117647, |
|
"loss": 0.8252, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 68.18, |
|
"eval_loss": 0.6226372122764587, |
|
"eval_runtime": 15.3292, |
|
"eval_samples_per_second": 19.962, |
|
"eval_steps_per_second": 1.305, |
|
"eval_wer": 0.6425992779783394, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 72.73, |
|
"learning_rate": 0.00010623529411764705, |
|
"loss": 0.7796, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 77.27, |
|
"learning_rate": 8.858823529411763e-05, |
|
"loss": 0.733, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 81.82, |
|
"learning_rate": 7.094117647058823e-05, |
|
"loss": 0.7071, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 86.36, |
|
"learning_rate": 5.329411764705882e-05, |
|
"loss": 0.6584, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 90.91, |
|
"learning_rate": 3.564705882352941e-05, |
|
"loss": 0.6424, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 90.91, |
|
"eval_loss": 0.6359109878540039, |
|
"eval_runtime": 14.8772, |
|
"eval_samples_per_second": 20.568, |
|
"eval_steps_per_second": 1.344, |
|
"eval_wer": 0.6040914560770156, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 95.45, |
|
"learning_rate": 1.7999999999999997e-05, |
|
"loss": 0.6096, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 3.5294117647058817e-07, |
|
"loss": 0.601, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"step": 2200, |
|
"total_flos": 1.307344883713109e+19, |
|
"train_loss": 1.7937395373257723, |
|
"train_runtime": 4833.3951, |
|
"train_samples_per_second": 14.441, |
|
"train_steps_per_second": 0.455 |
|
} |
|
], |
|
"max_steps": 2200, |
|
"num_train_epochs": 100, |
|
"total_flos": 1.307344883713109e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|