|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 28.458498023715414, |
|
"global_step": 7200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.00023999999999999998, |
|
"loss": 6.005, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"eval_loss": 3.0256412029266357, |
|
"eval_runtime": 103.2564, |
|
"eval_samples_per_second": 7.302, |
|
"eval_wer": 1.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 0.0002873060648801128, |
|
"loss": 2.0338, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"eval_loss": 0.6479684710502625, |
|
"eval_runtime": 104.5189, |
|
"eval_samples_per_second": 7.214, |
|
"eval_wer": 0.5284067231559939, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 0.0002703808180535966, |
|
"loss": 0.399, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"eval_loss": 0.49810782074928284, |
|
"eval_runtime": 104.6121, |
|
"eval_samples_per_second": 7.208, |
|
"eval_wer": 0.4114460341714127, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"learning_rate": 0.00025345557122708037, |
|
"loss": 0.2602, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"eval_loss": 0.49113723635673523, |
|
"eval_runtime": 105.9638, |
|
"eval_samples_per_second": 7.116, |
|
"eval_wer": 0.38324767328795667, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"learning_rate": 0.00023653032440056415, |
|
"loss": 0.1991, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"eval_loss": 0.5186901092529297, |
|
"eval_runtime": 104.8564, |
|
"eval_samples_per_second": 7.191, |
|
"eval_wer": 0.3838033060147243, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 9.49, |
|
"learning_rate": 0.00021960507757404793, |
|
"loss": 0.159, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 9.49, |
|
"eval_loss": 0.5159746408462524, |
|
"eval_runtime": 100.948, |
|
"eval_samples_per_second": 7.469, |
|
"eval_wer": 0.3706070287539936, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 11.07, |
|
"learning_rate": 0.0002026798307475317, |
|
"loss": 0.1378, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 11.07, |
|
"eval_loss": 0.5382620692253113, |
|
"eval_runtime": 100.0461, |
|
"eval_samples_per_second": 7.537, |
|
"eval_wer": 0.36741214057507987, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 12.65, |
|
"learning_rate": 0.00018575458392101551, |
|
"loss": 0.116, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 12.65, |
|
"eval_loss": 0.5114040970802307, |
|
"eval_runtime": 101.876, |
|
"eval_samples_per_second": 7.401, |
|
"eval_wer": 0.3560216696763439, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 14.23, |
|
"learning_rate": 0.0001688293370944993, |
|
"loss": 0.1022, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 14.23, |
|
"eval_loss": 0.5726804733276367, |
|
"eval_runtime": 101.0522, |
|
"eval_samples_per_second": 7.461, |
|
"eval_wer": 0.36352271148770665, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 15.81, |
|
"learning_rate": 0.00015190409026798307, |
|
"loss": 0.0896, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 15.81, |
|
"eval_loss": 0.5884941220283508, |
|
"eval_runtime": 101.7591, |
|
"eval_samples_per_second": 7.41, |
|
"eval_wer": 0.357271843311571, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 17.39, |
|
"learning_rate": 0.00013497884344146685, |
|
"loss": 0.0794, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 17.39, |
|
"eval_loss": 0.5713954567909241, |
|
"eval_runtime": 102.4951, |
|
"eval_samples_per_second": 7.356, |
|
"eval_wer": 0.3504653424086679, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 18.97, |
|
"learning_rate": 0.00011805359661495063, |
|
"loss": 0.0719, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 18.97, |
|
"eval_loss": 0.5819716453552246, |
|
"eval_runtime": 100.7538, |
|
"eval_samples_per_second": 7.484, |
|
"eval_wer": 0.34879844422836503, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 20.55, |
|
"learning_rate": 0.0001011283497884344, |
|
"loss": 0.0602, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 20.55, |
|
"eval_loss": 0.6186490654945374, |
|
"eval_runtime": 101.0143, |
|
"eval_samples_per_second": 7.464, |
|
"eval_wer": 0.3453257396860675, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 22.13, |
|
"learning_rate": 8.420310296191819e-05, |
|
"loss": 0.0604, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 22.13, |
|
"eval_loss": 0.6135568618774414, |
|
"eval_runtime": 101.9403, |
|
"eval_samples_per_second": 7.396, |
|
"eval_wer": 0.3394915960550076, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 23.72, |
|
"learning_rate": 6.727785613540198e-05, |
|
"loss": 0.0532, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 23.72, |
|
"eval_loss": 0.594706654548645, |
|
"eval_runtime": 100.6719, |
|
"eval_samples_per_second": 7.49, |
|
"eval_wer": 0.34324211696068896, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 25.3, |
|
"learning_rate": 5.0352609308885744e-05, |
|
"loss": 0.0457, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 25.3, |
|
"eval_loss": 0.6127145886421204, |
|
"eval_runtime": 101.5213, |
|
"eval_samples_per_second": 7.427, |
|
"eval_wer": 0.3442144742325323, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 26.88, |
|
"learning_rate": 3.342736248236953e-05, |
|
"loss": 0.0478, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 26.88, |
|
"eval_loss": 0.6120516061782837, |
|
"eval_runtime": 102.6766, |
|
"eval_samples_per_second": 7.343, |
|
"eval_wer": 0.336157799694402, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 28.46, |
|
"learning_rate": 1.650211565585331e-05, |
|
"loss": 0.0406, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 28.46, |
|
"eval_loss": 0.6265799403190613, |
|
"eval_runtime": 104.4584, |
|
"eval_samples_per_second": 7.218, |
|
"eval_wer": 0.33282400333379636, |
|
"step": 7200 |
|
} |
|
], |
|
"max_steps": 7590, |
|
"num_train_epochs": 30, |
|
"total_flos": 4.099942107044385e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|