|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 99.98461538461538, |
|
"global_step": 3200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 0.00019600000000000002, |
|
"loss": 4.3333, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"learning_rate": 0.00039600000000000003, |
|
"loss": 2.4826, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 9.37, |
|
"learning_rate": 0.000596, |
|
"loss": 2.2099, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 12.49, |
|
"learning_rate": 0.000796, |
|
"loss": 2.1639, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 15.62, |
|
"learning_rate": 0.000996, |
|
"loss": 2.1851, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 15.62, |
|
"eval_loss": 1.8067339658737183, |
|
"eval_runtime": 25.4413, |
|
"eval_samples_per_second": 20.007, |
|
"eval_steps_per_second": 2.516, |
|
"eval_wer": 0.9256004686584651, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 18.74, |
|
"learning_rate": 0.0009637037037037037, |
|
"loss": 2.1912, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 21.86, |
|
"learning_rate": 0.0009266666666666667, |
|
"loss": 2.1441, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 24.98, |
|
"learning_rate": 0.0008896296296296296, |
|
"loss": 2.1784, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 28.12, |
|
"learning_rate": 0.0008525925925925926, |
|
"loss": 2.1836, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 31.25, |
|
"learning_rate": 0.0008155555555555556, |
|
"loss": 2.1586, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 31.25, |
|
"eval_loss": 1.7883163690567017, |
|
"eval_runtime": 24.5739, |
|
"eval_samples_per_second": 20.713, |
|
"eval_steps_per_second": 2.604, |
|
"eval_wer": 0.9179847685998829, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 34.37, |
|
"learning_rate": 0.0007785185185185186, |
|
"loss": 2.1329, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 37.49, |
|
"learning_rate": 0.0007414814814814815, |
|
"loss": 2.1071, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 40.62, |
|
"learning_rate": 0.0007044444444444445, |
|
"loss": 2.0866, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 43.74, |
|
"learning_rate": 0.0006674074074074075, |
|
"loss": 2.0434, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 46.86, |
|
"learning_rate": 0.0006303703703703703, |
|
"loss": 2.0302, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 46.86, |
|
"eval_loss": 1.7570589780807495, |
|
"eval_runtime": 24.6634, |
|
"eval_samples_per_second": 20.638, |
|
"eval_steps_per_second": 2.595, |
|
"eval_wer": 0.9191564147627417, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 49.98, |
|
"learning_rate": 0.0005933333333333334, |
|
"loss": 2.0, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 53.12, |
|
"learning_rate": 0.0005562962962962963, |
|
"loss": 1.9953, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 56.25, |
|
"learning_rate": 0.0005192592592592593, |
|
"loss": 1.9243, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 59.37, |
|
"learning_rate": 0.0004822222222222222, |
|
"loss": 1.9093, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 62.49, |
|
"learning_rate": 0.0004451851851851852, |
|
"loss": 1.8706, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 62.49, |
|
"eval_loss": 1.631394624710083, |
|
"eval_runtime": 24.8239, |
|
"eval_samples_per_second": 20.504, |
|
"eval_steps_per_second": 2.578, |
|
"eval_wer": 0.8857644991212654, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 65.62, |
|
"learning_rate": 0.00040814814814814815, |
|
"loss": 1.8333, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 68.74, |
|
"learning_rate": 0.0003711111111111111, |
|
"loss": 1.7878, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 71.86, |
|
"learning_rate": 0.0003340740740740741, |
|
"loss": 1.7619, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 74.98, |
|
"learning_rate": 0.00029703703703703706, |
|
"loss": 1.7315, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 78.12, |
|
"learning_rate": 0.00026000000000000003, |
|
"loss": 1.7008, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 78.12, |
|
"eval_loss": 1.6130626201629639, |
|
"eval_runtime": 25.5604, |
|
"eval_samples_per_second": 19.914, |
|
"eval_steps_per_second": 2.504, |
|
"eval_wer": 0.8678968951376684, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 81.25, |
|
"learning_rate": 0.00022296296296296297, |
|
"loss": 1.6485, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 84.37, |
|
"learning_rate": 0.00018592592592592594, |
|
"loss": 1.6268, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 87.49, |
|
"learning_rate": 0.0001488888888888889, |
|
"loss": 1.5698, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 90.62, |
|
"learning_rate": 0.00011185185185185186, |
|
"loss": 1.5581, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 93.74, |
|
"learning_rate": 7.481481481481483e-05, |
|
"loss": 1.4982, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 93.74, |
|
"eval_loss": 1.6539617776870728, |
|
"eval_runtime": 24.6055, |
|
"eval_samples_per_second": 20.686, |
|
"eval_steps_per_second": 2.601, |
|
"eval_wer": 0.8649677797305214, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 96.86, |
|
"learning_rate": 3.777777777777778e-05, |
|
"loss": 1.4848, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 99.98, |
|
"learning_rate": 7.407407407407407e-07, |
|
"loss": 1.4475, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 99.98, |
|
"step": 3200, |
|
"total_flos": 3.685380703950235e+19, |
|
"train_loss": 1.9993535804748535, |
|
"train_runtime": 10079.8406, |
|
"train_samples_per_second": 10.268, |
|
"train_steps_per_second": 0.317 |
|
} |
|
], |
|
"max_steps": 3200, |
|
"num_train_epochs": 100, |
|
"total_flos": 3.685380703950235e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|