|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 58.8235294117647, |
|
"global_step": 9000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.96, |
|
"eval_loss": 2.9419331550598145, |
|
"eval_runtime": 165.8041, |
|
"eval_samples_per_second": 11.122, |
|
"eval_wer": 1.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 9.774774774774775e-05, |
|
"loss": 5.732, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"eval_loss": 2.8035287857055664, |
|
"eval_runtime": 163.2568, |
|
"eval_samples_per_second": 11.295, |
|
"eval_wer": 1.0, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"eval_loss": 0.5059587955474854, |
|
"eval_runtime": 166.512, |
|
"eval_samples_per_second": 11.074, |
|
"eval_wer": 0.646839455528622, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"learning_rate": 9.211711711711712e-05, |
|
"loss": 1.7477, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"eval_loss": 0.31517693400382996, |
|
"eval_runtime": 168.535, |
|
"eval_samples_per_second": 10.941, |
|
"eval_wer": 0.49533842998321836, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"learning_rate": 8.64864864864865e-05, |
|
"loss": 0.4124, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"eval_loss": 0.30525174736976624, |
|
"eval_runtime": 170.5049, |
|
"eval_samples_per_second": 10.815, |
|
"eval_wer": 0.4454596308036547, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 11.76, |
|
"eval_loss": 0.27497437596321106, |
|
"eval_runtime": 171.8183, |
|
"eval_samples_per_second": 10.732, |
|
"eval_wer": 0.4267201193361924, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 13.07, |
|
"learning_rate": 8.085585585585586e-05, |
|
"loss": 0.2831, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 13.73, |
|
"eval_loss": 0.28638383746147156, |
|
"eval_runtime": 174.9018, |
|
"eval_samples_per_second": 10.543, |
|
"eval_wer": 0.4133880290881969, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 15.69, |
|
"eval_loss": 0.27328914403915405, |
|
"eval_runtime": 174.9487, |
|
"eval_samples_per_second": 10.54, |
|
"eval_wer": 0.40583628566101065, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 16.34, |
|
"learning_rate": 7.522522522522523e-05, |
|
"loss": 0.2231, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 17.65, |
|
"eval_loss": 0.29071566462516785, |
|
"eval_runtime": 177.3068, |
|
"eval_samples_per_second": 10.4, |
|
"eval_wer": 0.3988439306358382, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 19.61, |
|
"learning_rate": 6.95945945945946e-05, |
|
"loss": 0.1885, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 19.61, |
|
"eval_loss": 0.2765745520591736, |
|
"eval_runtime": 177.2476, |
|
"eval_samples_per_second": 10.404, |
|
"eval_wer": 0.39912362483684505, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 21.57, |
|
"eval_loss": 0.28941160440444946, |
|
"eval_runtime": 180.7189, |
|
"eval_samples_per_second": 10.204, |
|
"eval_wer": 0.38355398098079435, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 22.88, |
|
"learning_rate": 6.396396396396397e-05, |
|
"loss": 0.1657, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 23.53, |
|
"eval_loss": 0.27626267075538635, |
|
"eval_runtime": 182.3178, |
|
"eval_samples_per_second": 10.114, |
|
"eval_wer": 0.38094350177139663, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 25.49, |
|
"eval_loss": 0.28250962495803833, |
|
"eval_runtime": 183.703, |
|
"eval_samples_per_second": 10.038, |
|
"eval_wer": 0.3820622785754242, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 26.14, |
|
"learning_rate": 5.833333333333334e-05, |
|
"loss": 0.1449, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 27.45, |
|
"eval_loss": 0.27114051580429077, |
|
"eval_runtime": 183.7677, |
|
"eval_samples_per_second": 10.034, |
|
"eval_wer": 0.38140965877307476, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 29.41, |
|
"learning_rate": 5.27027027027027e-05, |
|
"loss": 0.1344, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 29.41, |
|
"eval_loss": 0.3000315725803375, |
|
"eval_runtime": 185.7074, |
|
"eval_samples_per_second": 9.93, |
|
"eval_wer": 0.374230840947231, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 31.37, |
|
"eval_loss": 0.2914937734603882, |
|
"eval_runtime": 169.8087, |
|
"eval_samples_per_second": 10.859, |
|
"eval_wer": 0.3726459071415253, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 32.68, |
|
"learning_rate": 4.707207207207208e-05, |
|
"loss": 0.1229, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"eval_loss": 0.2908037304878235, |
|
"eval_runtime": 189.3466, |
|
"eval_samples_per_second": 9.739, |
|
"eval_wer": 0.3699421965317919, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 35.29, |
|
"eval_loss": 0.3009437620639801, |
|
"eval_runtime": 170.798, |
|
"eval_samples_per_second": 10.796, |
|
"eval_wer": 0.3701286593324632, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 35.95, |
|
"learning_rate": 4.1441441441441444e-05, |
|
"loss": 0.1133, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 37.25, |
|
"eval_loss": 0.3189816176891327, |
|
"eval_runtime": 171.2661, |
|
"eval_samples_per_second": 10.767, |
|
"eval_wer": 0.36947603953011376, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 39.22, |
|
"learning_rate": 3.581081081081081e-05, |
|
"loss": 0.11, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 39.22, |
|
"eval_loss": 0.30211004614830017, |
|
"eval_runtime": 190.742, |
|
"eval_samples_per_second": 9.668, |
|
"eval_wer": 0.36267014730561253, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 41.18, |
|
"eval_loss": 0.30648261308670044, |
|
"eval_runtime": 173.8977, |
|
"eval_samples_per_second": 10.604, |
|
"eval_wer": 0.36677232892038036, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 42.48, |
|
"learning_rate": 3.0180180180180183e-05, |
|
"loss": 0.1006, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 43.14, |
|
"eval_loss": 0.30688807368278503, |
|
"eval_runtime": 195.1525, |
|
"eval_samples_per_second": 9.449, |
|
"eval_wer": 0.36826403132575053, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 45.1, |
|
"eval_loss": 0.31185880303382874, |
|
"eval_runtime": 176.2725, |
|
"eval_samples_per_second": 10.461, |
|
"eval_wer": 0.36164460190192055, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 45.75, |
|
"learning_rate": 2.454954954954955e-05, |
|
"loss": 0.0998, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 47.06, |
|
"eval_loss": 0.31436818838119507, |
|
"eval_runtime": 195.8727, |
|
"eval_samples_per_second": 9.414, |
|
"eval_wer": 0.36108521349990674, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 49.02, |
|
"learning_rate": 1.891891891891892e-05, |
|
"loss": 0.0952, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 49.02, |
|
"eval_loss": 0.31186020374298096, |
|
"eval_runtime": 201.6879, |
|
"eval_samples_per_second": 9.143, |
|
"eval_wer": 0.3613649077009137, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 50.98, |
|
"eval_loss": 0.3063485026359558, |
|
"eval_runtime": 177.6307, |
|
"eval_samples_per_second": 10.381, |
|
"eval_wer": 0.35959351109453663, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 52.29, |
|
"learning_rate": 1.3288288288288289e-05, |
|
"loss": 0.0898, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 52.94, |
|
"eval_loss": 0.32244834303855896, |
|
"eval_runtime": 179.3678, |
|
"eval_samples_per_second": 10.281, |
|
"eval_wer": 0.360525825097893, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 54.9, |
|
"eval_loss": 0.32377830147743225, |
|
"eval_runtime": 179.7421, |
|
"eval_samples_per_second": 10.259, |
|
"eval_wer": 0.360525825097893, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 55.56, |
|
"learning_rate": 7.657657657657658e-06, |
|
"loss": 0.0891, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 56.86, |
|
"eval_loss": 0.3199419677257538, |
|
"eval_runtime": 178.805, |
|
"eval_samples_per_second": 10.313, |
|
"eval_wer": 0.3609919820995711, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 58.82, |
|
"learning_rate": 2.0270270270270273e-06, |
|
"loss": 0.0877, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 58.82, |
|
"eval_loss": 0.3155083954334259, |
|
"eval_runtime": 180.9925, |
|
"eval_samples_per_second": 10.188, |
|
"eval_wer": 0.35922058549319413, |
|
"step": 9000 |
|
} |
|
], |
|
"max_steps": 9180, |
|
"num_train_epochs": 60, |
|
"total_flos": 6.238876246016606e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|