{ "best_metric": 0.32769803615398474, "best_model_checkpoint": "./wav2vec2-base-hy/checkpoint-36331", "epoch": 50.0, "eval_steps": 500, "global_step": 38650, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 1.0217593908309937, "learning_rate": 9.928440366972478e-05, "loss": 4.8189, "step": 773 }, { "epoch": 1.0, "eval_cer": 0.9914830021613891, "eval_loss": 3.259536027908325, "eval_runtime": 538.6244, "eval_samples_per_second": 7.948, "eval_steps_per_second": 0.995, "eval_wer": 1.0, "step": 773 }, { "epoch": 2.0, "grad_norm": 6.301283359527588, "learning_rate": 9.725819134993447e-05, "loss": 1.7898, "step": 1546 }, { "epoch": 2.0, "eval_cer": 0.1494773295194324, "eval_loss": 0.5445137023925781, "eval_runtime": 538.4983, "eval_samples_per_second": 7.95, "eval_steps_per_second": 0.995, "eval_wer": 0.7093567953876644, "step": 1546 }, { "epoch": 3.0, "grad_norm": 3.109081983566284, "learning_rate": 9.523197903014417e-05, "loss": 0.6005, "step": 2319 }, { "epoch": 3.0, "eval_cer": 0.11405947435660509, "eval_loss": 0.3848719298839569, "eval_runtime": 538.0999, "eval_samples_per_second": 7.956, "eval_steps_per_second": 0.996, "eval_wer": 0.56861449762777, "step": 2319 }, { "epoch": 4.0, "grad_norm": 10.241255760192871, "learning_rate": 9.320576671035387e-05, "loss": 0.4408, "step": 3092 }, { "epoch": 4.0, "eval_cer": 0.09999839301927574, "eval_loss": 0.33175399899482727, "eval_runtime": 538.012, "eval_samples_per_second": 7.957, "eval_steps_per_second": 0.996, "eval_wer": 0.5128821091826317, "step": 3092 }, { "epoch": 5.0, "grad_norm": 3.3970553874969482, "learning_rate": 9.117955439056357e-05, "loss": 0.3726, "step": 3865 }, { "epoch": 5.0, "eval_cer": 0.09280313682637377, "eval_loss": 0.31485071778297424, "eval_runtime": 538.2419, "eval_samples_per_second": 7.954, "eval_steps_per_second": 0.996, "eval_wer": 0.4864572698336436, "step": 3865 }, { "epoch": 6.0, "grad_norm": 3.6496012210845947, "learning_rate": 8.915334207077327e-05, "loss": 0.324, "step": 4638 }, { "epoch": 6.0, "eval_cer": 0.08554360140450115, "eval_loss": 0.2828996181488037, "eval_runtime": 538.7731, "eval_samples_per_second": 7.946, "eval_steps_per_second": 0.995, "eval_wer": 0.4492823253858627, "step": 4638 }, { "epoch": 7.0, "grad_norm": 1.054186463356018, "learning_rate": 8.712712975098296e-05, "loss": 0.2885, "step": 5411 }, { "epoch": 7.0, "eval_cer": 0.08390448106574962, "eval_loss": 0.2892753779888153, "eval_runtime": 538.8258, "eval_samples_per_second": 7.945, "eval_steps_per_second": 0.995, "eval_wer": 0.4395832082157228, "step": 5411 }, { "epoch": 8.0, "grad_norm": 4.084922790527344, "learning_rate": 8.510091743119266e-05, "loss": 0.256, "step": 6184 }, { "epoch": 8.0, "eval_cer": 0.0834987184328724, "eval_loss": 0.2856770157814026, "eval_runtime": 538.8477, "eval_samples_per_second": 7.945, "eval_steps_per_second": 0.995, "eval_wer": 0.4325566032070146, "step": 6184 }, { "epoch": 9.0, "grad_norm": 2.03792405128479, "learning_rate": 8.307470511140236e-05, "loss": 0.2425, "step": 6957 }, { "epoch": 9.0, "eval_cer": 0.08157034156375294, "eval_loss": 0.2794096767902374, "eval_runtime": 538.7335, "eval_samples_per_second": 7.946, "eval_steps_per_second": 0.995, "eval_wer": 0.4256501111044382, "step": 6957 }, { "epoch": 10.0, "grad_norm": 4.848245143890381, "learning_rate": 8.104849279161207e-05, "loss": 0.2239, "step": 7730 }, { "epoch": 10.0, "eval_cer": 0.07841262444056983, "eval_loss": 0.2803117334842682, "eval_runtime": 539.9556, "eval_samples_per_second": 7.928, "eval_steps_per_second": 0.993, "eval_wer": 0.4096750945889136, "step": 7730 }, { "epoch": 11.0, "grad_norm": 5.475008964538574, "learning_rate": 7.902228047182177e-05, "loss": 0.202, "step": 8503 }, { "epoch": 11.0, "eval_cer": 0.07942100484504688, "eval_loss": 0.2747463583946228, "eval_runtime": 540.606, "eval_samples_per_second": 7.919, "eval_steps_per_second": 0.991, "eval_wer": 0.4147198366464477, "step": 8503 }, { "epoch": 12.0, "grad_norm": 2.023427724838257, "learning_rate": 7.699606815203146e-05, "loss": 0.1912, "step": 9276 }, { "epoch": 12.0, "eval_cer": 0.07573700153466659, "eval_loss": 0.2822825014591217, "eval_runtime": 538.7132, "eval_samples_per_second": 7.947, "eval_steps_per_second": 0.995, "eval_wer": 0.3974536063900066, "step": 9276 }, { "epoch": 13.0, "grad_norm": 4.661593437194824, "learning_rate": 7.496985583224116e-05, "loss": 0.1755, "step": 10049 }, { "epoch": 13.0, "eval_cer": 0.07552005913689065, "eval_loss": 0.2788126468658447, "eval_runtime": 539.2919, "eval_samples_per_second": 7.938, "eval_steps_per_second": 0.994, "eval_wer": 0.39339979580805956, "step": 10049 }, { "epoch": 14.0, "grad_norm": 2.8817713260650635, "learning_rate": 7.294364351245086e-05, "loss": 0.1669, "step": 10822 }, { "epoch": 14.0, "eval_cer": 0.0756767397575066, "eval_loss": 0.3079923391342163, "eval_runtime": 538.3008, "eval_samples_per_second": 7.953, "eval_steps_per_second": 0.996, "eval_wer": 0.39333973935499367, "step": 10822 }, { "epoch": 15.0, "grad_norm": 4.330118656158447, "learning_rate": 7.091743119266056e-05, "loss": 0.1622, "step": 11595 }, { "epoch": 15.0, "eval_cer": 0.07294085507444338, "eval_loss": 0.2876891791820526, "eval_runtime": 539.8362, "eval_samples_per_second": 7.93, "eval_steps_per_second": 0.993, "eval_wer": 0.3854122875502973, "step": 11595 }, { "epoch": 16.0, "grad_norm": 0.5675534009933472, "learning_rate": 6.889121887287026e-05, "loss": 0.1485, "step": 12368 }, { "epoch": 16.0, "eval_cer": 0.07495359843158682, "eval_loss": 0.30401086807250977, "eval_runtime": 540.0181, "eval_samples_per_second": 7.928, "eval_steps_per_second": 0.993, "eval_wer": 0.38766440454026785, "step": 12368 }, { "epoch": 17.0, "grad_norm": 1.4718132019042969, "learning_rate": 6.686500655307996e-05, "loss": 0.1426, "step": 13141 }, { "epoch": 17.0, "eval_cer": 0.07407779393686173, "eval_loss": 0.2863430678844452, "eval_runtime": 539.387, "eval_samples_per_second": 7.937, "eval_steps_per_second": 0.994, "eval_wer": 0.38862530778932197, "step": 13141 }, { "epoch": 18.0, "grad_norm": 4.17996883392334, "learning_rate": 6.483879423328964e-05, "loss": 0.137, "step": 13914 }, { "epoch": 18.0, "eval_cer": 0.07082365797022265, "eval_loss": 0.29217201471328735, "eval_runtime": 540.1379, "eval_samples_per_second": 7.926, "eval_steps_per_second": 0.992, "eval_wer": 0.37601345264548675, "step": 13914 }, { "epoch": 19.0, "grad_norm": 1.8616777658462524, "learning_rate": 6.281258191349934e-05, "loss": 0.1324, "step": 14687 }, { "epoch": 19.0, "eval_cer": 0.07016077842146283, "eval_loss": 0.28696727752685547, "eval_runtime": 539.124, "eval_samples_per_second": 7.941, "eval_steps_per_second": 0.994, "eval_wer": 0.3717794727043421, "step": 14687 }, { "epoch": 20.0, "grad_norm": 2.518717050552368, "learning_rate": 6.078636959370905e-05, "loss": 0.1247, "step": 15460 }, { "epoch": 20.0, "eval_cer": 0.06962645733064432, "eval_loss": 0.2821752727031708, "eval_runtime": 540.8162, "eval_samples_per_second": 7.916, "eval_steps_per_second": 0.991, "eval_wer": 0.3689868476367786, "step": 15460 }, { "epoch": 21.0, "grad_norm": 3.7167491912841797, "learning_rate": 5.876015727391875e-05, "loss": 0.1226, "step": 16233 }, { "epoch": 21.0, "eval_cer": 0.06858191985987128, "eval_loss": 0.28035247325897217, "eval_runtime": 540.4383, "eval_samples_per_second": 7.921, "eval_steps_per_second": 0.992, "eval_wer": 0.3664945048345445, "step": 16233 }, { "epoch": 22.0, "grad_norm": 1.6708425283432007, "learning_rate": 5.673394495412845e-05, "loss": 0.1162, "step": 17006 }, { "epoch": 22.0, "eval_cer": 0.07046208730726275, "eval_loss": 0.3041548728942871, "eval_runtime": 539.3197, "eval_samples_per_second": 7.938, "eval_steps_per_second": 0.994, "eval_wer": 0.37355113806978557, "step": 17006 }, { "epoch": 23.0, "grad_norm": 0.9431765675544739, "learning_rate": 5.4707732634338135e-05, "loss": 0.1116, "step": 17779 }, { "epoch": 23.0, "eval_cer": 0.06816410487156206, "eval_loss": 0.293849378824234, "eval_runtime": 540.8252, "eval_samples_per_second": 7.916, "eval_steps_per_second": 0.991, "eval_wer": 0.3625608071587292, "step": 17779 }, { "epoch": 24.0, "grad_norm": 1.915635347366333, "learning_rate": 5.268152031454784e-05, "loss": 0.1068, "step": 18552 }, { "epoch": 24.0, "eval_cer": 0.06715170701527436, "eval_loss": 0.29729682207107544, "eval_runtime": 540.0658, "eval_samples_per_second": 7.927, "eval_steps_per_second": 0.992, "eval_wer": 0.3608191700198186, "step": 18552 }, { "epoch": 25.0, "grad_norm": 1.3635971546173096, "learning_rate": 5.065530799475754e-05, "loss": 0.1014, "step": 19325 }, { "epoch": 25.0, "eval_cer": 0.06892340326377785, "eval_loss": 0.31454575061798096, "eval_runtime": 538.5303, "eval_samples_per_second": 7.949, "eval_steps_per_second": 0.995, "eval_wer": 0.36304125878325627, "step": 19325 }, { "epoch": 26.0, "grad_norm": 2.388526201248169, "learning_rate": 4.8629095674967235e-05, "loss": 0.0996, "step": 20098 }, { "epoch": 26.0, "eval_cer": 0.06636830391219457, "eval_loss": 0.30341199040412903, "eval_runtime": 543.2486, "eval_samples_per_second": 7.88, "eval_steps_per_second": 0.987, "eval_wer": 0.35634496426641044, "step": 20098 }, { "epoch": 27.0, "grad_norm": 7.999663829803467, "learning_rate": 4.6602883355176935e-05, "loss": 0.096, "step": 20871 }, { "epoch": 27.0, "eval_cer": 0.06646070530383988, "eval_loss": 0.2997344732284546, "eval_runtime": 541.4586, "eval_samples_per_second": 7.906, "eval_steps_per_second": 0.99, "eval_wer": 0.35541408924388923, "step": 20871 }, { "epoch": 28.0, "grad_norm": 3.0532593727111816, "learning_rate": 4.4576671035386635e-05, "loss": 0.0936, "step": 21644 }, { "epoch": 28.0, "eval_cer": 0.0656773022007601, "eval_loss": 0.3077986538410187, "eval_runtime": 541.1099, "eval_samples_per_second": 7.912, "eval_steps_per_second": 0.991, "eval_wer": 0.35099993994354695, "step": 21644 }, { "epoch": 29.0, "grad_norm": 1.7395150661468506, "learning_rate": 4.255045871559633e-05, "loss": 0.091, "step": 22417 }, { "epoch": 29.0, "eval_cer": 0.06572952907429876, "eval_loss": 0.299217164516449, "eval_runtime": 544.6033, "eval_samples_per_second": 7.861, "eval_steps_per_second": 0.984, "eval_wer": 0.34874782295357637, "step": 22417 }, { "epoch": 30.0, "grad_norm": 2.331186532974243, "learning_rate": 4.0524246395806035e-05, "loss": 0.0914, "step": 23190 }, { "epoch": 30.0, "eval_cer": 0.06598664599018135, "eval_loss": 0.31265002489089966, "eval_runtime": 539.5662, "eval_samples_per_second": 7.934, "eval_steps_per_second": 0.993, "eval_wer": 0.3516305327007387, "step": 23190 }, { "epoch": 31.0, "grad_norm": 0.5025594234466553, "learning_rate": 3.849803407601573e-05, "loss": 0.0849, "step": 23963 }, { "epoch": 31.0, "eval_cer": 0.06473721847706437, "eval_loss": 0.3014875054359436, "eval_runtime": 540.1447, "eval_samples_per_second": 7.926, "eval_steps_per_second": 0.992, "eval_wer": 0.34751666566572575, "step": 23963 }, { "epoch": 32.0, "grad_norm": 3.843916416168213, "learning_rate": 3.647182175622543e-05, "loss": 0.0819, "step": 24736 }, { "epoch": 32.0, "eval_cer": 0.06535992350771752, "eval_loss": 0.3202614486217499, "eval_runtime": 539.8225, "eval_samples_per_second": 7.93, "eval_steps_per_second": 0.993, "eval_wer": 0.348927992312774, "step": 24736 }, { "epoch": 33.0, "grad_norm": 3.8599517345428467, "learning_rate": 3.444560943643513e-05, "loss": 0.0806, "step": 25509 }, { "epoch": 33.0, "eval_cer": 0.06497023068208296, "eval_loss": 0.3200363516807556, "eval_runtime": 539.477, "eval_samples_per_second": 7.935, "eval_steps_per_second": 0.994, "eval_wer": 0.3485676535943787, "step": 25509 }, { "epoch": 34.0, "grad_norm": 3.0424208641052246, "learning_rate": 3.241939711664482e-05, "loss": 0.08, "step": 26282 }, { "epoch": 34.0, "eval_cer": 0.06427119406702717, "eval_loss": 0.3235025107860565, "eval_runtime": 540.33, "eval_samples_per_second": 7.923, "eval_steps_per_second": 0.992, "eval_wer": 0.3427722058735211, "step": 26282 }, { "epoch": 35.0, "grad_norm": 1.9409205913543701, "learning_rate": 3.0393184796854524e-05, "loss": 0.0762, "step": 27055 }, { "epoch": 35.0, "eval_cer": 0.06393774556674192, "eval_loss": 0.31750521063804626, "eval_runtime": 540.1387, "eval_samples_per_second": 7.926, "eval_steps_per_second": 0.992, "eval_wer": 0.34442375833283284, "step": 27055 }, { "epoch": 36.0, "grad_norm": 2.8859474658966064, "learning_rate": 2.8366972477064224e-05, "loss": 0.0753, "step": 27828 }, { "epoch": 36.0, "eval_cer": 0.06355207019291803, "eval_loss": 0.3156121075153351, "eval_runtime": 541.5718, "eval_samples_per_second": 7.905, "eval_steps_per_second": 0.99, "eval_wer": 0.34202150021019756, "step": 27828 }, { "epoch": 37.0, "grad_norm": 1.7962828874588013, "learning_rate": 2.634076015727392e-05, "loss": 0.0709, "step": 28601 }, { "epoch": 37.0, "eval_cer": 0.06434752565142982, "eval_loss": 0.31625601649284363, "eval_runtime": 540.5673, "eval_samples_per_second": 7.919, "eval_steps_per_second": 0.992, "eval_wer": 0.3438832502552399, "step": 28601 }, { "epoch": 38.0, "grad_norm": 0.47632133960723877, "learning_rate": 2.4314547837483617e-05, "loss": 0.0709, "step": 29374 }, { "epoch": 38.0, "eval_cer": 0.06292534771045422, "eval_loss": 0.3177834451198578, "eval_runtime": 542.3771, "eval_samples_per_second": 7.893, "eval_steps_per_second": 0.988, "eval_wer": 0.33988949612635877, "step": 29374 }, { "epoch": 39.0, "grad_norm": 1.1476659774780273, "learning_rate": 2.2288335517693317e-05, "loss": 0.0674, "step": 30147 }, { "epoch": 39.0, "eval_cer": 0.06325879621073945, "eval_loss": 0.31504514813423157, "eval_runtime": 539.8002, "eval_samples_per_second": 7.931, "eval_steps_per_second": 0.993, "eval_wer": 0.34009969371208937, "step": 30147 }, { "epoch": 40.0, "grad_norm": 3.619795560836792, "learning_rate": 2.0262123197903017e-05, "loss": 0.0679, "step": 30920 }, { "epoch": 40.0, "eval_cer": 0.06262002137284363, "eval_loss": 0.3333224356174469, "eval_runtime": 540.4897, "eval_samples_per_second": 7.921, "eval_steps_per_second": 0.992, "eval_wer": 0.3378776049486517, "step": 30920 }, { "epoch": 41.0, "grad_norm": 5.51519775390625, "learning_rate": 1.8235910878112714e-05, "loss": 0.0636, "step": 31693 }, { "epoch": 41.0, "eval_cer": 0.06255975959568365, "eval_loss": 0.32877811789512634, "eval_runtime": 541.1158, "eval_samples_per_second": 7.911, "eval_steps_per_second": 0.991, "eval_wer": 0.3371869557383941, "step": 31693 }, { "epoch": 42.0, "grad_norm": 3.61895751953125, "learning_rate": 1.620969855832241e-05, "loss": 0.0614, "step": 32466 }, { "epoch": 42.0, "eval_cer": 0.062358887005150375, "eval_loss": 0.3240591883659363, "eval_runtime": 540.0535, "eval_samples_per_second": 7.927, "eval_steps_per_second": 0.992, "eval_wer": 0.33619602426280704, "step": 32466 }, { "epoch": 43.0, "grad_norm": 1.1857914924621582, "learning_rate": 1.4183486238532112e-05, "loss": 0.0629, "step": 33239 }, { "epoch": 43.0, "eval_cer": 0.06161967587198792, "eval_loss": 0.3232579231262207, "eval_runtime": 540.6476, "eval_samples_per_second": 7.918, "eval_steps_per_second": 0.991, "eval_wer": 0.3335235121013753, "step": 33239 }, { "epoch": 44.0, "grad_norm": 0.7397142052650452, "learning_rate": 1.2157273918741809e-05, "loss": 0.061, "step": 34012 }, { "epoch": 44.0, "eval_cer": 0.06244325349317435, "eval_loss": 0.32336390018463135, "eval_runtime": 539.148, "eval_samples_per_second": 7.94, "eval_steps_per_second": 0.994, "eval_wer": 0.3349648669749565, "step": 34012 }, { "epoch": 45.0, "grad_norm": 2.209028482437134, "learning_rate": 1.0131061598951509e-05, "loss": 0.0604, "step": 34785 }, { "epoch": 45.0, "eval_cer": 0.061691990004579894, "eval_loss": 0.32257187366485596, "eval_runtime": 539.0839, "eval_samples_per_second": 7.941, "eval_steps_per_second": 0.994, "eval_wer": 0.33421416131163295, "step": 34785 }, { "epoch": 46.0, "grad_norm": 0.40143582224845886, "learning_rate": 8.104849279161205e-06, "loss": 0.0582, "step": 35558 }, { "epoch": 46.0, "eval_cer": 0.06054299878672955, "eval_loss": 0.326095849275589, "eval_runtime": 538.3445, "eval_samples_per_second": 7.952, "eval_steps_per_second": 0.996, "eval_wer": 0.3291994474806318, "step": 35558 }, { "epoch": 47.0, "grad_norm": 2.2391343116760254, "learning_rate": 6.078636959370904e-06, "loss": 0.0575, "step": 36331 }, { "epoch": 47.0, "eval_cer": 0.060346143648006945, "eval_loss": 0.31861406564712524, "eval_runtime": 541.1135, "eval_samples_per_second": 7.911, "eval_steps_per_second": 0.991, "eval_wer": 0.32769803615398474, "step": 36331 }, { "epoch": 48.0, "grad_norm": 2.6581954956054688, "learning_rate": 4.052424639580603e-06, "loss": 0.0553, "step": 37104 }, { "epoch": 48.0, "eval_cer": 0.060695661955534844, "eval_loss": 0.32371774315834045, "eval_runtime": 539.7027, "eval_samples_per_second": 7.932, "eval_steps_per_second": 0.993, "eval_wer": 0.3286889676295718, "step": 37104 }, { "epoch": 49.0, "grad_norm": 1.2842614650726318, "learning_rate": 2.0262123197903013e-06, "loss": 0.0554, "step": 37877 }, { "epoch": 49.0, "eval_cer": 0.060611295467510866, "eval_loss": 0.3244434893131256, "eval_runtime": 539.5278, "eval_samples_per_second": 7.935, "eval_steps_per_second": 0.993, "eval_wer": 0.3289291934418353, "step": 37877 }, { "epoch": 50.0, "grad_norm": 1.3330661058425903, "learning_rate": 0.0, "loss": 0.0543, "step": 38650 }, { "epoch": 50.0, "eval_cer": 0.0604787195577589, "eval_loss": 0.32520928978919983, "eval_runtime": 540.1631, "eval_samples_per_second": 7.925, "eval_steps_per_second": 0.992, "eval_wer": 0.32829860068464356, "step": 38650 } ], "logging_steps": 500, "max_steps": 38650, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "total_flos": 4.930233517238343e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }