|
{ |
|
"best_metric": 0.32769803615398474, |
|
"best_model_checkpoint": "./wav2vec2-base-hy/checkpoint-36331", |
|
"epoch": 50.0, |
|
"eval_steps": 500, |
|
"global_step": 38650, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.0217593908309937, |
|
"learning_rate": 9.928440366972478e-05, |
|
"loss": 4.8189, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_cer": 0.9914830021613891, |
|
"eval_loss": 3.259536027908325, |
|
"eval_runtime": 538.6244, |
|
"eval_samples_per_second": 7.948, |
|
"eval_steps_per_second": 0.995, |
|
"eval_wer": 1.0, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 6.301283359527588, |
|
"learning_rate": 9.725819134993447e-05, |
|
"loss": 1.7898, |
|
"step": 1546 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_cer": 0.1494773295194324, |
|
"eval_loss": 0.5445137023925781, |
|
"eval_runtime": 538.4983, |
|
"eval_samples_per_second": 7.95, |
|
"eval_steps_per_second": 0.995, |
|
"eval_wer": 0.7093567953876644, |
|
"step": 1546 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 3.109081983566284, |
|
"learning_rate": 9.523197903014417e-05, |
|
"loss": 0.6005, |
|
"step": 2319 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_cer": 0.11405947435660509, |
|
"eval_loss": 0.3848719298839569, |
|
"eval_runtime": 538.0999, |
|
"eval_samples_per_second": 7.956, |
|
"eval_steps_per_second": 0.996, |
|
"eval_wer": 0.56861449762777, |
|
"step": 2319 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 10.241255760192871, |
|
"learning_rate": 9.320576671035387e-05, |
|
"loss": 0.4408, |
|
"step": 3092 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_cer": 0.09999839301927574, |
|
"eval_loss": 0.33175399899482727, |
|
"eval_runtime": 538.012, |
|
"eval_samples_per_second": 7.957, |
|
"eval_steps_per_second": 0.996, |
|
"eval_wer": 0.5128821091826317, |
|
"step": 3092 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 3.3970553874969482, |
|
"learning_rate": 9.117955439056357e-05, |
|
"loss": 0.3726, |
|
"step": 3865 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_cer": 0.09280313682637377, |
|
"eval_loss": 0.31485071778297424, |
|
"eval_runtime": 538.2419, |
|
"eval_samples_per_second": 7.954, |
|
"eval_steps_per_second": 0.996, |
|
"eval_wer": 0.4864572698336436, |
|
"step": 3865 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 3.6496012210845947, |
|
"learning_rate": 8.915334207077327e-05, |
|
"loss": 0.324, |
|
"step": 4638 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_cer": 0.08554360140450115, |
|
"eval_loss": 0.2828996181488037, |
|
"eval_runtime": 538.7731, |
|
"eval_samples_per_second": 7.946, |
|
"eval_steps_per_second": 0.995, |
|
"eval_wer": 0.4492823253858627, |
|
"step": 4638 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 1.054186463356018, |
|
"learning_rate": 8.712712975098296e-05, |
|
"loss": 0.2885, |
|
"step": 5411 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_cer": 0.08390448106574962, |
|
"eval_loss": 0.2892753779888153, |
|
"eval_runtime": 538.8258, |
|
"eval_samples_per_second": 7.945, |
|
"eval_steps_per_second": 0.995, |
|
"eval_wer": 0.4395832082157228, |
|
"step": 5411 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 4.084922790527344, |
|
"learning_rate": 8.510091743119266e-05, |
|
"loss": 0.256, |
|
"step": 6184 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_cer": 0.0834987184328724, |
|
"eval_loss": 0.2856770157814026, |
|
"eval_runtime": 538.8477, |
|
"eval_samples_per_second": 7.945, |
|
"eval_steps_per_second": 0.995, |
|
"eval_wer": 0.4325566032070146, |
|
"step": 6184 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 2.03792405128479, |
|
"learning_rate": 8.307470511140236e-05, |
|
"loss": 0.2425, |
|
"step": 6957 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_cer": 0.08157034156375294, |
|
"eval_loss": 0.2794096767902374, |
|
"eval_runtime": 538.7335, |
|
"eval_samples_per_second": 7.946, |
|
"eval_steps_per_second": 0.995, |
|
"eval_wer": 0.4256501111044382, |
|
"step": 6957 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 4.848245143890381, |
|
"learning_rate": 8.104849279161207e-05, |
|
"loss": 0.2239, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_cer": 0.07841262444056983, |
|
"eval_loss": 0.2803117334842682, |
|
"eval_runtime": 539.9556, |
|
"eval_samples_per_second": 7.928, |
|
"eval_steps_per_second": 0.993, |
|
"eval_wer": 0.4096750945889136, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 5.475008964538574, |
|
"learning_rate": 7.902228047182177e-05, |
|
"loss": 0.202, |
|
"step": 8503 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_cer": 0.07942100484504688, |
|
"eval_loss": 0.2747463583946228, |
|
"eval_runtime": 540.606, |
|
"eval_samples_per_second": 7.919, |
|
"eval_steps_per_second": 0.991, |
|
"eval_wer": 0.4147198366464477, |
|
"step": 8503 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 2.023427724838257, |
|
"learning_rate": 7.699606815203146e-05, |
|
"loss": 0.1912, |
|
"step": 9276 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_cer": 0.07573700153466659, |
|
"eval_loss": 0.2822825014591217, |
|
"eval_runtime": 538.7132, |
|
"eval_samples_per_second": 7.947, |
|
"eval_steps_per_second": 0.995, |
|
"eval_wer": 0.3974536063900066, |
|
"step": 9276 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 4.661593437194824, |
|
"learning_rate": 7.496985583224116e-05, |
|
"loss": 0.1755, |
|
"step": 10049 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_cer": 0.07552005913689065, |
|
"eval_loss": 0.2788126468658447, |
|
"eval_runtime": 539.2919, |
|
"eval_samples_per_second": 7.938, |
|
"eval_steps_per_second": 0.994, |
|
"eval_wer": 0.39339979580805956, |
|
"step": 10049 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 2.8817713260650635, |
|
"learning_rate": 7.294364351245086e-05, |
|
"loss": 0.1669, |
|
"step": 10822 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_cer": 0.0756767397575066, |
|
"eval_loss": 0.3079923391342163, |
|
"eval_runtime": 538.3008, |
|
"eval_samples_per_second": 7.953, |
|
"eval_steps_per_second": 0.996, |
|
"eval_wer": 0.39333973935499367, |
|
"step": 10822 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 4.330118656158447, |
|
"learning_rate": 7.091743119266056e-05, |
|
"loss": 0.1622, |
|
"step": 11595 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_cer": 0.07294085507444338, |
|
"eval_loss": 0.2876891791820526, |
|
"eval_runtime": 539.8362, |
|
"eval_samples_per_second": 7.93, |
|
"eval_steps_per_second": 0.993, |
|
"eval_wer": 0.3854122875502973, |
|
"step": 11595 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 0.5675534009933472, |
|
"learning_rate": 6.889121887287026e-05, |
|
"loss": 0.1485, |
|
"step": 12368 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_cer": 0.07495359843158682, |
|
"eval_loss": 0.30401086807250977, |
|
"eval_runtime": 540.0181, |
|
"eval_samples_per_second": 7.928, |
|
"eval_steps_per_second": 0.993, |
|
"eval_wer": 0.38766440454026785, |
|
"step": 12368 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 1.4718132019042969, |
|
"learning_rate": 6.686500655307996e-05, |
|
"loss": 0.1426, |
|
"step": 13141 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_cer": 0.07407779393686173, |
|
"eval_loss": 0.2863430678844452, |
|
"eval_runtime": 539.387, |
|
"eval_samples_per_second": 7.937, |
|
"eval_steps_per_second": 0.994, |
|
"eval_wer": 0.38862530778932197, |
|
"step": 13141 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 4.17996883392334, |
|
"learning_rate": 6.483879423328964e-05, |
|
"loss": 0.137, |
|
"step": 13914 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_cer": 0.07082365797022265, |
|
"eval_loss": 0.29217201471328735, |
|
"eval_runtime": 540.1379, |
|
"eval_samples_per_second": 7.926, |
|
"eval_steps_per_second": 0.992, |
|
"eval_wer": 0.37601345264548675, |
|
"step": 13914 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 1.8616777658462524, |
|
"learning_rate": 6.281258191349934e-05, |
|
"loss": 0.1324, |
|
"step": 14687 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_cer": 0.07016077842146283, |
|
"eval_loss": 0.28696727752685547, |
|
"eval_runtime": 539.124, |
|
"eval_samples_per_second": 7.941, |
|
"eval_steps_per_second": 0.994, |
|
"eval_wer": 0.3717794727043421, |
|
"step": 14687 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 2.518717050552368, |
|
"learning_rate": 6.078636959370905e-05, |
|
"loss": 0.1247, |
|
"step": 15460 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_cer": 0.06962645733064432, |
|
"eval_loss": 0.2821752727031708, |
|
"eval_runtime": 540.8162, |
|
"eval_samples_per_second": 7.916, |
|
"eval_steps_per_second": 0.991, |
|
"eval_wer": 0.3689868476367786, |
|
"step": 15460 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"grad_norm": 3.7167491912841797, |
|
"learning_rate": 5.876015727391875e-05, |
|
"loss": 0.1226, |
|
"step": 16233 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_cer": 0.06858191985987128, |
|
"eval_loss": 0.28035247325897217, |
|
"eval_runtime": 540.4383, |
|
"eval_samples_per_second": 7.921, |
|
"eval_steps_per_second": 0.992, |
|
"eval_wer": 0.3664945048345445, |
|
"step": 16233 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"grad_norm": 1.6708425283432007, |
|
"learning_rate": 5.673394495412845e-05, |
|
"loss": 0.1162, |
|
"step": 17006 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_cer": 0.07046208730726275, |
|
"eval_loss": 0.3041548728942871, |
|
"eval_runtime": 539.3197, |
|
"eval_samples_per_second": 7.938, |
|
"eval_steps_per_second": 0.994, |
|
"eval_wer": 0.37355113806978557, |
|
"step": 17006 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"grad_norm": 0.9431765675544739, |
|
"learning_rate": 5.4707732634338135e-05, |
|
"loss": 0.1116, |
|
"step": 17779 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_cer": 0.06816410487156206, |
|
"eval_loss": 0.293849378824234, |
|
"eval_runtime": 540.8252, |
|
"eval_samples_per_second": 7.916, |
|
"eval_steps_per_second": 0.991, |
|
"eval_wer": 0.3625608071587292, |
|
"step": 17779 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"grad_norm": 1.915635347366333, |
|
"learning_rate": 5.268152031454784e-05, |
|
"loss": 0.1068, |
|
"step": 18552 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_cer": 0.06715170701527436, |
|
"eval_loss": 0.29729682207107544, |
|
"eval_runtime": 540.0658, |
|
"eval_samples_per_second": 7.927, |
|
"eval_steps_per_second": 0.992, |
|
"eval_wer": 0.3608191700198186, |
|
"step": 18552 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"grad_norm": 1.3635971546173096, |
|
"learning_rate": 5.065530799475754e-05, |
|
"loss": 0.1014, |
|
"step": 19325 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_cer": 0.06892340326377785, |
|
"eval_loss": 0.31454575061798096, |
|
"eval_runtime": 538.5303, |
|
"eval_samples_per_second": 7.949, |
|
"eval_steps_per_second": 0.995, |
|
"eval_wer": 0.36304125878325627, |
|
"step": 19325 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"grad_norm": 2.388526201248169, |
|
"learning_rate": 4.8629095674967235e-05, |
|
"loss": 0.0996, |
|
"step": 20098 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_cer": 0.06636830391219457, |
|
"eval_loss": 0.30341199040412903, |
|
"eval_runtime": 543.2486, |
|
"eval_samples_per_second": 7.88, |
|
"eval_steps_per_second": 0.987, |
|
"eval_wer": 0.35634496426641044, |
|
"step": 20098 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"grad_norm": 7.999663829803467, |
|
"learning_rate": 4.6602883355176935e-05, |
|
"loss": 0.096, |
|
"step": 20871 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_cer": 0.06646070530383988, |
|
"eval_loss": 0.2997344732284546, |
|
"eval_runtime": 541.4586, |
|
"eval_samples_per_second": 7.906, |
|
"eval_steps_per_second": 0.99, |
|
"eval_wer": 0.35541408924388923, |
|
"step": 20871 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"grad_norm": 3.0532593727111816, |
|
"learning_rate": 4.4576671035386635e-05, |
|
"loss": 0.0936, |
|
"step": 21644 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_cer": 0.0656773022007601, |
|
"eval_loss": 0.3077986538410187, |
|
"eval_runtime": 541.1099, |
|
"eval_samples_per_second": 7.912, |
|
"eval_steps_per_second": 0.991, |
|
"eval_wer": 0.35099993994354695, |
|
"step": 21644 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"grad_norm": 1.7395150661468506, |
|
"learning_rate": 4.255045871559633e-05, |
|
"loss": 0.091, |
|
"step": 22417 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_cer": 0.06572952907429876, |
|
"eval_loss": 0.299217164516449, |
|
"eval_runtime": 544.6033, |
|
"eval_samples_per_second": 7.861, |
|
"eval_steps_per_second": 0.984, |
|
"eval_wer": 0.34874782295357637, |
|
"step": 22417 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"grad_norm": 2.331186532974243, |
|
"learning_rate": 4.0524246395806035e-05, |
|
"loss": 0.0914, |
|
"step": 23190 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_cer": 0.06598664599018135, |
|
"eval_loss": 0.31265002489089966, |
|
"eval_runtime": 539.5662, |
|
"eval_samples_per_second": 7.934, |
|
"eval_steps_per_second": 0.993, |
|
"eval_wer": 0.3516305327007387, |
|
"step": 23190 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"grad_norm": 0.5025594234466553, |
|
"learning_rate": 3.849803407601573e-05, |
|
"loss": 0.0849, |
|
"step": 23963 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_cer": 0.06473721847706437, |
|
"eval_loss": 0.3014875054359436, |
|
"eval_runtime": 540.1447, |
|
"eval_samples_per_second": 7.926, |
|
"eval_steps_per_second": 0.992, |
|
"eval_wer": 0.34751666566572575, |
|
"step": 23963 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"grad_norm": 3.843916416168213, |
|
"learning_rate": 3.647182175622543e-05, |
|
"loss": 0.0819, |
|
"step": 24736 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_cer": 0.06535992350771752, |
|
"eval_loss": 0.3202614486217499, |
|
"eval_runtime": 539.8225, |
|
"eval_samples_per_second": 7.93, |
|
"eval_steps_per_second": 0.993, |
|
"eval_wer": 0.348927992312774, |
|
"step": 24736 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"grad_norm": 3.8599517345428467, |
|
"learning_rate": 3.444560943643513e-05, |
|
"loss": 0.0806, |
|
"step": 25509 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_cer": 0.06497023068208296, |
|
"eval_loss": 0.3200363516807556, |
|
"eval_runtime": 539.477, |
|
"eval_samples_per_second": 7.935, |
|
"eval_steps_per_second": 0.994, |
|
"eval_wer": 0.3485676535943787, |
|
"step": 25509 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"grad_norm": 3.0424208641052246, |
|
"learning_rate": 3.241939711664482e-05, |
|
"loss": 0.08, |
|
"step": 26282 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_cer": 0.06427119406702717, |
|
"eval_loss": 0.3235025107860565, |
|
"eval_runtime": 540.33, |
|
"eval_samples_per_second": 7.923, |
|
"eval_steps_per_second": 0.992, |
|
"eval_wer": 0.3427722058735211, |
|
"step": 26282 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"grad_norm": 1.9409205913543701, |
|
"learning_rate": 3.0393184796854524e-05, |
|
"loss": 0.0762, |
|
"step": 27055 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_cer": 0.06393774556674192, |
|
"eval_loss": 0.31750521063804626, |
|
"eval_runtime": 540.1387, |
|
"eval_samples_per_second": 7.926, |
|
"eval_steps_per_second": 0.992, |
|
"eval_wer": 0.34442375833283284, |
|
"step": 27055 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"grad_norm": 2.8859474658966064, |
|
"learning_rate": 2.8366972477064224e-05, |
|
"loss": 0.0753, |
|
"step": 27828 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_cer": 0.06355207019291803, |
|
"eval_loss": 0.3156121075153351, |
|
"eval_runtime": 541.5718, |
|
"eval_samples_per_second": 7.905, |
|
"eval_steps_per_second": 0.99, |
|
"eval_wer": 0.34202150021019756, |
|
"step": 27828 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"grad_norm": 1.7962828874588013, |
|
"learning_rate": 2.634076015727392e-05, |
|
"loss": 0.0709, |
|
"step": 28601 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_cer": 0.06434752565142982, |
|
"eval_loss": 0.31625601649284363, |
|
"eval_runtime": 540.5673, |
|
"eval_samples_per_second": 7.919, |
|
"eval_steps_per_second": 0.992, |
|
"eval_wer": 0.3438832502552399, |
|
"step": 28601 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"grad_norm": 0.47632133960723877, |
|
"learning_rate": 2.4314547837483617e-05, |
|
"loss": 0.0709, |
|
"step": 29374 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_cer": 0.06292534771045422, |
|
"eval_loss": 0.3177834451198578, |
|
"eval_runtime": 542.3771, |
|
"eval_samples_per_second": 7.893, |
|
"eval_steps_per_second": 0.988, |
|
"eval_wer": 0.33988949612635877, |
|
"step": 29374 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"grad_norm": 1.1476659774780273, |
|
"learning_rate": 2.2288335517693317e-05, |
|
"loss": 0.0674, |
|
"step": 30147 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_cer": 0.06325879621073945, |
|
"eval_loss": 0.31504514813423157, |
|
"eval_runtime": 539.8002, |
|
"eval_samples_per_second": 7.931, |
|
"eval_steps_per_second": 0.993, |
|
"eval_wer": 0.34009969371208937, |
|
"step": 30147 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"grad_norm": 3.619795560836792, |
|
"learning_rate": 2.0262123197903017e-05, |
|
"loss": 0.0679, |
|
"step": 30920 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_cer": 0.06262002137284363, |
|
"eval_loss": 0.3333224356174469, |
|
"eval_runtime": 540.4897, |
|
"eval_samples_per_second": 7.921, |
|
"eval_steps_per_second": 0.992, |
|
"eval_wer": 0.3378776049486517, |
|
"step": 30920 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"grad_norm": 5.51519775390625, |
|
"learning_rate": 1.8235910878112714e-05, |
|
"loss": 0.0636, |
|
"step": 31693 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_cer": 0.06255975959568365, |
|
"eval_loss": 0.32877811789512634, |
|
"eval_runtime": 541.1158, |
|
"eval_samples_per_second": 7.911, |
|
"eval_steps_per_second": 0.991, |
|
"eval_wer": 0.3371869557383941, |
|
"step": 31693 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"grad_norm": 3.61895751953125, |
|
"learning_rate": 1.620969855832241e-05, |
|
"loss": 0.0614, |
|
"step": 32466 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_cer": 0.062358887005150375, |
|
"eval_loss": 0.3240591883659363, |
|
"eval_runtime": 540.0535, |
|
"eval_samples_per_second": 7.927, |
|
"eval_steps_per_second": 0.992, |
|
"eval_wer": 0.33619602426280704, |
|
"step": 32466 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"grad_norm": 1.1857914924621582, |
|
"learning_rate": 1.4183486238532112e-05, |
|
"loss": 0.0629, |
|
"step": 33239 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_cer": 0.06161967587198792, |
|
"eval_loss": 0.3232579231262207, |
|
"eval_runtime": 540.6476, |
|
"eval_samples_per_second": 7.918, |
|
"eval_steps_per_second": 0.991, |
|
"eval_wer": 0.3335235121013753, |
|
"step": 33239 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"grad_norm": 0.7397142052650452, |
|
"learning_rate": 1.2157273918741809e-05, |
|
"loss": 0.061, |
|
"step": 34012 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_cer": 0.06244325349317435, |
|
"eval_loss": 0.32336390018463135, |
|
"eval_runtime": 539.148, |
|
"eval_samples_per_second": 7.94, |
|
"eval_steps_per_second": 0.994, |
|
"eval_wer": 0.3349648669749565, |
|
"step": 34012 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"grad_norm": 2.209028482437134, |
|
"learning_rate": 1.0131061598951509e-05, |
|
"loss": 0.0604, |
|
"step": 34785 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_cer": 0.061691990004579894, |
|
"eval_loss": 0.32257187366485596, |
|
"eval_runtime": 539.0839, |
|
"eval_samples_per_second": 7.941, |
|
"eval_steps_per_second": 0.994, |
|
"eval_wer": 0.33421416131163295, |
|
"step": 34785 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"grad_norm": 0.40143582224845886, |
|
"learning_rate": 8.104849279161205e-06, |
|
"loss": 0.0582, |
|
"step": 35558 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_cer": 0.06054299878672955, |
|
"eval_loss": 0.326095849275589, |
|
"eval_runtime": 538.3445, |
|
"eval_samples_per_second": 7.952, |
|
"eval_steps_per_second": 0.996, |
|
"eval_wer": 0.3291994474806318, |
|
"step": 35558 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"grad_norm": 2.2391343116760254, |
|
"learning_rate": 6.078636959370904e-06, |
|
"loss": 0.0575, |
|
"step": 36331 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_cer": 0.060346143648006945, |
|
"eval_loss": 0.31861406564712524, |
|
"eval_runtime": 541.1135, |
|
"eval_samples_per_second": 7.911, |
|
"eval_steps_per_second": 0.991, |
|
"eval_wer": 0.32769803615398474, |
|
"step": 36331 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"grad_norm": 2.6581954956054688, |
|
"learning_rate": 4.052424639580603e-06, |
|
"loss": 0.0553, |
|
"step": 37104 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_cer": 0.060695661955534844, |
|
"eval_loss": 0.32371774315834045, |
|
"eval_runtime": 539.7027, |
|
"eval_samples_per_second": 7.932, |
|
"eval_steps_per_second": 0.993, |
|
"eval_wer": 0.3286889676295718, |
|
"step": 37104 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"grad_norm": 1.2842614650726318, |
|
"learning_rate": 2.0262123197903013e-06, |
|
"loss": 0.0554, |
|
"step": 37877 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_cer": 0.060611295467510866, |
|
"eval_loss": 0.3244434893131256, |
|
"eval_runtime": 539.5278, |
|
"eval_samples_per_second": 7.935, |
|
"eval_steps_per_second": 0.993, |
|
"eval_wer": 0.3289291934418353, |
|
"step": 37877 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"grad_norm": 1.3330661058425903, |
|
"learning_rate": 0.0, |
|
"loss": 0.0543, |
|
"step": 38650 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_cer": 0.0604787195577589, |
|
"eval_loss": 0.32520928978919983, |
|
"eval_runtime": 540.1631, |
|
"eval_samples_per_second": 7.925, |
|
"eval_steps_per_second": 0.992, |
|
"eval_wer": 0.32829860068464356, |
|
"step": 38650 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 38650, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"total_flos": 4.930233517238343e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|