|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.013162988772745, |
|
"eval_steps": 100, |
|
"global_step": 2600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07742934572202866, |
|
"eval_loss": 3.5345706939697266, |
|
"eval_runtime": 152.1587, |
|
"eval_samples_per_second": 37.172, |
|
"eval_steps_per_second": 4.646, |
|
"eval_wer": 1.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.1548586914440573, |
|
"eval_loss": 2.982929229736328, |
|
"eval_runtime": 150.2931, |
|
"eval_samples_per_second": 37.633, |
|
"eval_steps_per_second": 4.704, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.23228803716608595, |
|
"eval_loss": 2.770493268966675, |
|
"eval_runtime": 150.0834, |
|
"eval_samples_per_second": 37.686, |
|
"eval_steps_per_second": 4.711, |
|
"eval_wer": 1.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.3097173828881146, |
|
"eval_loss": 1.3696156740188599, |
|
"eval_runtime": 150.2358, |
|
"eval_samples_per_second": 37.647, |
|
"eval_steps_per_second": 4.706, |
|
"eval_wer": 0.8535090112500201, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.38714672861014326, |
|
"grad_norm": 3.0665018558502197, |
|
"learning_rate": 0.00029699999999999996, |
|
"loss": 3.7305, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.38714672861014326, |
|
"eval_loss": 1.0936249494552612, |
|
"eval_runtime": 153.8749, |
|
"eval_samples_per_second": 36.757, |
|
"eval_steps_per_second": 4.595, |
|
"eval_wer": 0.7465295052237968, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.4645760743321719, |
|
"eval_loss": 0.8456823825836182, |
|
"eval_runtime": 153.7133, |
|
"eval_samples_per_second": 36.796, |
|
"eval_steps_per_second": 4.599, |
|
"eval_wer": 0.6413313861116015, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.5420054200542005, |
|
"eval_loss": 0.7860042452812195, |
|
"eval_runtime": 152.7402, |
|
"eval_samples_per_second": 37.03, |
|
"eval_steps_per_second": 4.629, |
|
"eval_wer": 0.5835566753863684, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.6194347657762292, |
|
"eval_loss": 0.7366299629211426, |
|
"eval_runtime": 151.7301, |
|
"eval_samples_per_second": 37.277, |
|
"eval_steps_per_second": 4.66, |
|
"eval_wer": 0.563736739901462, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.6968641114982579, |
|
"eval_loss": 0.7318999171257019, |
|
"eval_runtime": 152.5414, |
|
"eval_samples_per_second": 37.078, |
|
"eval_steps_per_second": 4.635, |
|
"eval_wer": 0.5493572563431818, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.7742934572202865, |
|
"grad_norm": 2.5090723037719727, |
|
"learning_rate": 0.00022928571428571426, |
|
"loss": 0.7504, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7742934572202865, |
|
"eval_loss": 0.6438552737236023, |
|
"eval_runtime": 151.1771, |
|
"eval_samples_per_second": 37.413, |
|
"eval_steps_per_second": 4.677, |
|
"eval_wer": 0.5104074721959204, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8517228029423152, |
|
"eval_loss": 0.6213911175727844, |
|
"eval_runtime": 151.6033, |
|
"eval_samples_per_second": 37.308, |
|
"eval_steps_per_second": 4.663, |
|
"eval_wer": 0.4759191795991077, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.9291521486643438, |
|
"eval_loss": 0.5957211852073669, |
|
"eval_runtime": 152.1457, |
|
"eval_samples_per_second": 37.175, |
|
"eval_steps_per_second": 4.647, |
|
"eval_wer": 0.4628396270321452, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.0065814943863725, |
|
"eval_loss": 0.5717456340789795, |
|
"eval_runtime": 152.0809, |
|
"eval_samples_per_second": 37.191, |
|
"eval_steps_per_second": 4.649, |
|
"eval_wer": 0.43531639678387446, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.084010840108401, |
|
"eval_loss": 0.549981951713562, |
|
"eval_runtime": 151.522, |
|
"eval_samples_per_second": 37.328, |
|
"eval_steps_per_second": 4.666, |
|
"eval_wer": 0.4192197204345942, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.1614401858304297, |
|
"grad_norm": 0.4617447853088379, |
|
"learning_rate": 0.00015799999999999996, |
|
"loss": 0.5571, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.1614401858304297, |
|
"eval_loss": 0.5342110395431519, |
|
"eval_runtime": 152.7132, |
|
"eval_samples_per_second": 37.037, |
|
"eval_steps_per_second": 4.63, |
|
"eval_wer": 0.4073277591436504, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.2388695315524583, |
|
"eval_loss": 0.5206533670425415, |
|
"eval_runtime": 151.157, |
|
"eval_samples_per_second": 37.418, |
|
"eval_steps_per_second": 4.677, |
|
"eval_wer": 0.4023687631397346, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.316298877274487, |
|
"eval_loss": 0.5142083168029785, |
|
"eval_runtime": 151.9132, |
|
"eval_samples_per_second": 37.232, |
|
"eval_steps_per_second": 4.654, |
|
"eval_wer": 0.3968641170900804, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.3937282229965158, |
|
"eval_loss": 0.5083270072937012, |
|
"eval_runtime": 152.7068, |
|
"eval_samples_per_second": 37.038, |
|
"eval_steps_per_second": 4.63, |
|
"eval_wer": 0.39583701112163183, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.4711575687185443, |
|
"eval_loss": 0.4886321723461151, |
|
"eval_runtime": 152.3343, |
|
"eval_samples_per_second": 37.129, |
|
"eval_steps_per_second": 4.641, |
|
"eval_wer": 0.3825488276548282, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.5485869144405728, |
|
"grad_norm": 0.441532164812088, |
|
"learning_rate": 8.685714285714285e-05, |
|
"loss": 0.4603, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.5485869144405728, |
|
"eval_loss": 0.4732557237148285, |
|
"eval_runtime": 155.1846, |
|
"eval_samples_per_second": 36.447, |
|
"eval_steps_per_second": 4.556, |
|
"eval_wer": 0.3743480284379965, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.6260162601626016, |
|
"eval_loss": 0.4615860879421234, |
|
"eval_runtime": 155.1621, |
|
"eval_samples_per_second": 36.452, |
|
"eval_steps_per_second": 4.557, |
|
"eval_wer": 0.3618622715090433, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.7034456058846303, |
|
"eval_loss": 0.4535791277885437, |
|
"eval_runtime": 152.4739, |
|
"eval_samples_per_second": 37.095, |
|
"eval_steps_per_second": 4.637, |
|
"eval_wer": 0.3627288921699218, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.7808749516066589, |
|
"eval_loss": 0.44882732629776, |
|
"eval_runtime": 153.2132, |
|
"eval_samples_per_second": 36.916, |
|
"eval_steps_per_second": 4.614, |
|
"eval_wer": 0.3487185248190528, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.8583042973286876, |
|
"eval_loss": 0.4429319202899933, |
|
"eval_runtime": 154.0199, |
|
"eval_samples_per_second": 36.723, |
|
"eval_steps_per_second": 4.59, |
|
"eval_wer": 0.34810868065028644, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.9357336430507162, |
|
"grad_norm": 0.48590919375419617, |
|
"learning_rate": 1.557142857142857e-05, |
|
"loss": 0.4163, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.9357336430507162, |
|
"eval_loss": 0.4377308487892151, |
|
"eval_runtime": 154.6322, |
|
"eval_samples_per_second": 36.577, |
|
"eval_steps_per_second": 4.572, |
|
"eval_wer": 0.3418978992473239, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.013162988772745, |
|
"eval_loss": 0.4348967373371124, |
|
"eval_runtime": 154.2249, |
|
"eval_samples_per_second": 36.674, |
|
"eval_steps_per_second": 4.584, |
|
"eval_wer": 0.3390733578340903, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.013162988772745, |
|
"step": 2600, |
|
"total_flos": 1.1633941226063049e+19, |
|
"train_loss": 1.1519982103201059, |
|
"train_runtime": 7259.2244, |
|
"train_samples_per_second": 11.461, |
|
"train_steps_per_second": 0.358 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2600, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 400, |
|
"total_flos": 1.1633941226063049e+19, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|