|
{ |
|
"best_metric": 20.77837333954789, |
|
"best_model_checkpoint": "./the-final-whisper/checkpoint-500", |
|
"epoch": 0.8237232289950577, |
|
"eval_steps": 125, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04118616144975288, |
|
"grad_norm": 52.4793701171875, |
|
"learning_rate": 4.2000000000000006e-07, |
|
"loss": 2.7503, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.08237232289950576, |
|
"grad_norm": 16.525333404541016, |
|
"learning_rate": 9.200000000000001e-07, |
|
"loss": 2.0994, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.12355848434925865, |
|
"grad_norm": 14.271560668945312, |
|
"learning_rate": 1.42e-06, |
|
"loss": 1.3213, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.16474464579901152, |
|
"grad_norm": 8.628070831298828, |
|
"learning_rate": 1.9200000000000003e-06, |
|
"loss": 0.7342, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.20593080724876442, |
|
"grad_norm": 8.307537078857422, |
|
"learning_rate": 2.42e-06, |
|
"loss": 0.5674, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.20593080724876442, |
|
"eval_loss": 0.6090311408042908, |
|
"eval_runtime": 2805.8489, |
|
"eval_samples_per_second": 1.442, |
|
"eval_steps_per_second": 0.18, |
|
"eval_wer": 45.15497553017945, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.2471169686985173, |
|
"grad_norm": 6.402714729309082, |
|
"learning_rate": 2.92e-06, |
|
"loss": 0.4853, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.2883031301482702, |
|
"grad_norm": 7.138896942138672, |
|
"learning_rate": 3.4200000000000007e-06, |
|
"loss": 0.424, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.32948929159802304, |
|
"grad_norm": 6.78637170791626, |
|
"learning_rate": 3.920000000000001e-06, |
|
"loss": 0.3443, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.37067545304777594, |
|
"grad_norm": 6.042959690093994, |
|
"learning_rate": 4.42e-06, |
|
"loss": 0.2769, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.41186161449752884, |
|
"grad_norm": 2.8671882152557373, |
|
"learning_rate": 4.92e-06, |
|
"loss": 0.1545, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.41186161449752884, |
|
"eval_loss": 0.18588024377822876, |
|
"eval_runtime": 2810.3027, |
|
"eval_samples_per_second": 1.439, |
|
"eval_steps_per_second": 0.18, |
|
"eval_wer": 30.12351433232347, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.45304777594728174, |
|
"grad_norm": 4.210362911224365, |
|
"learning_rate": 5.420000000000001e-06, |
|
"loss": 0.094, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.4942339373970346, |
|
"grad_norm": 3.7346065044403076, |
|
"learning_rate": 5.92e-06, |
|
"loss": 0.0858, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5354200988467874, |
|
"grad_norm": 2.7393386363983154, |
|
"learning_rate": 6.42e-06, |
|
"loss": 0.0745, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.5766062602965404, |
|
"grad_norm": 2.8941423892974854, |
|
"learning_rate": 6.92e-06, |
|
"loss": 0.0631, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.6177924217462932, |
|
"grad_norm": 3.1176912784576416, |
|
"learning_rate": 7.420000000000001e-06, |
|
"loss": 0.0658, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.6177924217462932, |
|
"eval_loss": 0.1334521770477295, |
|
"eval_runtime": 2821.0386, |
|
"eval_samples_per_second": 1.434, |
|
"eval_steps_per_second": 0.179, |
|
"eval_wer": 22.91773479375437, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.6589785831960461, |
|
"grad_norm": 3.2934091091156006, |
|
"learning_rate": 7.92e-06, |
|
"loss": 0.0552, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.700164744645799, |
|
"grad_norm": 3.6386263370513916, |
|
"learning_rate": 8.42e-06, |
|
"loss": 0.0529, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.7413509060955519, |
|
"grad_norm": 1.1384811401367188, |
|
"learning_rate": 8.920000000000001e-06, |
|
"loss": 0.0418, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.7825370675453048, |
|
"grad_norm": 4.086984634399414, |
|
"learning_rate": 9.42e-06, |
|
"loss": 0.0493, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.8237232289950577, |
|
"grad_norm": 2.331613540649414, |
|
"learning_rate": 9.920000000000002e-06, |
|
"loss": 0.0378, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8237232289950577, |
|
"eval_loss": 0.12090734392404556, |
|
"eval_runtime": 2820.3109, |
|
"eval_samples_per_second": 1.434, |
|
"eval_steps_per_second": 0.179, |
|
"eval_wer": 20.77837333954789, |
|
"step": 500 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 1000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 250, |
|
"total_flos": 2.30868320256e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|