|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 10.0, |
|
"eval_steps": 300, |
|
"global_step": 7070, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.4243281471004243, |
|
"eval_loss": 5.990268707275391, |
|
"eval_runtime": 138.1214, |
|
"eval_samples_per_second": 40.949, |
|
"eval_steps_per_second": 5.119, |
|
"eval_wer": 1.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.7072135785007072, |
|
"grad_norm": 2.022184371948242, |
|
"learning_rate": 4.9500000000000004e-05, |
|
"loss": 7.061, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8486562942008486, |
|
"eval_loss": 3.0451483726501465, |
|
"eval_runtime": 135.2034, |
|
"eval_samples_per_second": 41.833, |
|
"eval_steps_per_second": 5.229, |
|
"eval_wer": 1.0, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.272984441301273, |
|
"eval_loss": 2.9642043113708496, |
|
"eval_runtime": 136.8192, |
|
"eval_samples_per_second": 41.339, |
|
"eval_steps_per_second": 5.167, |
|
"eval_wer": 1.0, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.4144271570014144, |
|
"grad_norm": 0.7561541795730591, |
|
"learning_rate": 4.624048706240488e-05, |
|
"loss": 3.0081, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.6973125884016973, |
|
"eval_loss": 2.956415891647339, |
|
"eval_runtime": 136.6446, |
|
"eval_samples_per_second": 41.392, |
|
"eval_steps_per_second": 5.174, |
|
"eval_wer": 1.0, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.1216407355021216, |
|
"grad_norm": 0.3668934106826782, |
|
"learning_rate": 4.245053272450533e-05, |
|
"loss": 2.9733, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.1216407355021216, |
|
"eval_loss": 2.947998285293579, |
|
"eval_runtime": 136.4831, |
|
"eval_samples_per_second": 41.441, |
|
"eval_steps_per_second": 5.18, |
|
"eval_wer": 1.0, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.545968882602546, |
|
"eval_loss": 2.945077657699585, |
|
"eval_runtime": 136.3573, |
|
"eval_samples_per_second": 41.479, |
|
"eval_steps_per_second": 5.185, |
|
"eval_wer": 1.0, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.828854314002829, |
|
"grad_norm": 1.441468596458435, |
|
"learning_rate": 3.866057838660579e-05, |
|
"loss": 2.9454, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.9702970297029703, |
|
"eval_loss": 2.9147346019744873, |
|
"eval_runtime": 135.861, |
|
"eval_samples_per_second": 41.631, |
|
"eval_steps_per_second": 5.204, |
|
"eval_wer": 1.0, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 3.3946251768033946, |
|
"eval_loss": 2.9019417762756348, |
|
"eval_runtime": 136.0133, |
|
"eval_samples_per_second": 41.584, |
|
"eval_steps_per_second": 5.198, |
|
"eval_wer": 1.0, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.536067892503536, |
|
"grad_norm": 0.46694883704185486, |
|
"learning_rate": 3.487062404870624e-05, |
|
"loss": 2.9064, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.818953323903819, |
|
"eval_loss": 2.884958505630493, |
|
"eval_runtime": 136.0891, |
|
"eval_samples_per_second": 41.561, |
|
"eval_steps_per_second": 5.195, |
|
"eval_wer": 1.0, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 4.243281471004243, |
|
"grad_norm": 0.4952280819416046, |
|
"learning_rate": 3.10882800608828e-05, |
|
"loss": 2.9048, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.243281471004243, |
|
"eval_loss": 2.8812334537506104, |
|
"eval_runtime": 136.4568, |
|
"eval_samples_per_second": 41.449, |
|
"eval_steps_per_second": 5.181, |
|
"eval_wer": 1.0, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.667609618104668, |
|
"eval_loss": 2.884371042251587, |
|
"eval_runtime": 136.787, |
|
"eval_samples_per_second": 41.349, |
|
"eval_steps_per_second": 5.169, |
|
"eval_wer": 1.0, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 4.9504950495049505, |
|
"grad_norm": 0.8865047097206116, |
|
"learning_rate": 2.7290715372907157e-05, |
|
"loss": 2.8965, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 5.091937765205092, |
|
"eval_loss": 2.9125277996063232, |
|
"eval_runtime": 136.3564, |
|
"eval_samples_per_second": 41.48, |
|
"eval_steps_per_second": 5.185, |
|
"eval_wer": 1.0, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 5.516265912305516, |
|
"eval_loss": 2.898144006729126, |
|
"eval_runtime": 136.0768, |
|
"eval_samples_per_second": 41.565, |
|
"eval_steps_per_second": 5.196, |
|
"eval_wer": 1.0, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 5.657708628005658, |
|
"grad_norm": 0.3529145121574402, |
|
"learning_rate": 2.3508371385083716e-05, |
|
"loss": 2.9261, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 5.9405940594059405, |
|
"eval_loss": 2.905318260192871, |
|
"eval_runtime": 136.6781, |
|
"eval_samples_per_second": 41.382, |
|
"eval_steps_per_second": 5.173, |
|
"eval_wer": 1.0, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 6.364922206506365, |
|
"grad_norm": 0.22229251265525818, |
|
"learning_rate": 1.971841704718417e-05, |
|
"loss": 2.9273, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 6.364922206506365, |
|
"eval_loss": 2.916677951812744, |
|
"eval_runtime": 136.7502, |
|
"eval_samples_per_second": 41.36, |
|
"eval_steps_per_second": 5.17, |
|
"eval_wer": 1.0, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 6.789250353606789, |
|
"eval_loss": 2.911259651184082, |
|
"eval_runtime": 136.484, |
|
"eval_samples_per_second": 41.441, |
|
"eval_steps_per_second": 5.18, |
|
"eval_wer": 1.0, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 7.072135785007072, |
|
"grad_norm": 1.7586228847503662, |
|
"learning_rate": 1.592846270928463e-05, |
|
"loss": 2.9302, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 7.2135785007072135, |
|
"eval_loss": 2.9133317470550537, |
|
"eval_runtime": 135.9523, |
|
"eval_samples_per_second": 41.603, |
|
"eval_steps_per_second": 5.2, |
|
"eval_wer": 1.0, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 7.637906647807638, |
|
"eval_loss": 2.921302080154419, |
|
"eval_runtime": 136.5228, |
|
"eval_samples_per_second": 41.429, |
|
"eval_steps_per_second": 5.179, |
|
"eval_wer": 1.0, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 7.779349363507779, |
|
"grad_norm": 0.6302638649940491, |
|
"learning_rate": 1.2146118721461187e-05, |
|
"loss": 2.9397, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 8.062234794908063, |
|
"eval_loss": 2.9251174926757812, |
|
"eval_runtime": 136.4335, |
|
"eval_samples_per_second": 41.456, |
|
"eval_steps_per_second": 5.182, |
|
"eval_wer": 1.0, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 8.486562942008486, |
|
"grad_norm": 0.5835816860198975, |
|
"learning_rate": 8.340943683409437e-06, |
|
"loss": 2.937, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 8.486562942008486, |
|
"eval_loss": 2.921030282974243, |
|
"eval_runtime": 136.1229, |
|
"eval_samples_per_second": 41.551, |
|
"eval_steps_per_second": 5.194, |
|
"eval_wer": 1.0, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 8.910891089108912, |
|
"eval_loss": 2.92145037651062, |
|
"eval_runtime": 137.1094, |
|
"eval_samples_per_second": 41.252, |
|
"eval_steps_per_second": 5.156, |
|
"eval_wer": 1.0, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 9.193776520509195, |
|
"grad_norm": 0.7211419939994812, |
|
"learning_rate": 4.558599695585997e-06, |
|
"loss": 2.9406, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 9.335219236209335, |
|
"eval_loss": 2.917142629623413, |
|
"eval_runtime": 136.4229, |
|
"eval_samples_per_second": 41.459, |
|
"eval_steps_per_second": 5.182, |
|
"eval_wer": 1.0, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 9.75954738330976, |
|
"eval_loss": 2.9176828861236572, |
|
"eval_runtime": 137.2177, |
|
"eval_samples_per_second": 41.219, |
|
"eval_steps_per_second": 5.152, |
|
"eval_wer": 1.0, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 9.900990099009901, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.762557077625571e-07, |
|
"loss": 2.9378, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 7070, |
|
"total_flos": 2.4662883830172946e+19, |
|
"train_loss": 3.228043903960534, |
|
"train_runtime": 12366.081, |
|
"train_samples_per_second": 18.293, |
|
"train_steps_per_second": 0.572 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 7070, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 600, |
|
"total_flos": 2.4662883830172946e+19, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|