{ "best_metric": null, "best_model_checkpoint": null, "epoch": 31.99304865938431, "global_step": 4000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.79, "learning_rate": 0.0001, "loss": 0.0392, "step": 100 }, { "epoch": 0.79, "eval_loss": 0.23765751719474792, "eval_runtime": 220.5891, "eval_samples_per_second": 16.297, "eval_steps_per_second": 2.04, "eval_wer": 0.21887876816308827, "step": 100 }, { "epoch": 1.6, "learning_rate": 9.991993594875901e-05, "loss": 0.0336, "step": 200 }, { "epoch": 1.6, "eval_loss": 0.26165521144866943, "eval_runtime": 157.9726, "eval_samples_per_second": 22.757, "eval_steps_per_second": 2.849, "eval_wer": 0.21649316851008457, "step": 200 }, { "epoch": 2.4, "learning_rate": 9.983987189751803e-05, "loss": 0.0293, "step": 300 }, { "epoch": 2.4, "eval_loss": 0.28323182463645935, "eval_runtime": 169.3448, "eval_samples_per_second": 21.229, "eval_steps_per_second": 2.657, "eval_wer": 0.2197462589459987, "step": 300 }, { "epoch": 3.2, "learning_rate": 9.975980784627703e-05, "loss": 0.0283, "step": 400 }, { "epoch": 3.2, "eval_loss": 0.29311421513557434, "eval_runtime": 175.6178, "eval_samples_per_second": 20.471, "eval_steps_per_second": 2.562, "eval_wer": 0.21508349598785512, "step": 400 }, { "epoch": 3.99, "learning_rate": 9.967974379503603e-05, "loss": 0.0274, "step": 500 }, { "epoch": 3.99, "eval_loss": 0.30566948652267456, "eval_runtime": 182.1945, "eval_samples_per_second": 19.732, "eval_steps_per_second": 2.47, "eval_wer": 0.21828236824983735, "step": 500 }, { "epoch": 4.79, "learning_rate": 9.959967974379504e-05, "loss": 0.0264, "step": 600 }, { "epoch": 4.79, "eval_loss": 0.30197781324386597, "eval_runtime": 174.7969, "eval_samples_per_second": 20.567, "eval_steps_per_second": 2.574, "eval_wer": 0.2171437865972674, "step": 600 }, { "epoch": 5.6, "learning_rate": 9.951961569255405e-05, "loss": 0.0259, "step": 700 }, { "epoch": 5.6, "eval_loss": 0.3002428412437439, "eval_runtime": 191.2838, "eval_samples_per_second": 18.794, "eval_steps_per_second": 2.353, "eval_wer": 0.21725222294513122, "step": 700 }, { "epoch": 6.4, "learning_rate": 9.943955164131305e-05, "loss": 0.0254, "step": 800 }, { "epoch": 6.4, "eval_loss": 0.3097267746925354, "eval_runtime": 183.9687, "eval_samples_per_second": 19.541, "eval_steps_per_second": 2.446, "eval_wer": 0.21752331381479073, "step": 800 }, { "epoch": 7.2, "learning_rate": 9.936028823058447e-05, "loss": 0.0252, "step": 900 }, { "epoch": 7.2, "eval_loss": 0.2970833480358124, "eval_runtime": 182.1563, "eval_samples_per_second": 19.736, "eval_steps_per_second": 2.47, "eval_wer": 0.2170353502494036, "step": 900 }, { "epoch": 7.99, "learning_rate": 9.928022417934349e-05, "loss": 0.0234, "step": 1000 }, { "epoch": 7.99, "eval_loss": 0.31429246068000793, "eval_runtime": 193.7999, "eval_samples_per_second": 18.55, "eval_steps_per_second": 2.322, "eval_wer": 0.21405335068314899, "step": 1000 }, { "epoch": 8.79, "learning_rate": 9.920016012810248e-05, "loss": 0.0228, "step": 1100 }, { "epoch": 8.79, "eval_loss": 0.32985326647758484, "eval_runtime": 194.3544, "eval_samples_per_second": 18.497, "eval_steps_per_second": 2.315, "eval_wer": 0.2142702233788766, "step": 1100 }, { "epoch": 9.6, "learning_rate": 9.912009607686149e-05, "loss": 0.0236, "step": 1200 }, { "epoch": 9.6, "eval_loss": 0.3165593147277832, "eval_runtime": 185.0001, "eval_samples_per_second": 19.432, "eval_steps_per_second": 2.432, "eval_wer": 0.21833658642376924, "step": 1200 }, { "epoch": 10.4, "learning_rate": 9.90400320256205e-05, "loss": 0.0241, "step": 1300 }, { "epoch": 10.4, "eval_loss": 0.3285478949546814, "eval_runtime": 181.9532, "eval_samples_per_second": 19.758, "eval_steps_per_second": 2.473, "eval_wer": 0.2192582953806116, "step": 1300 }, { "epoch": 11.2, "learning_rate": 9.895996797437951e-05, "loss": 0.0243, "step": 1400 }, { "epoch": 11.2, "eval_loss": 0.3187803626060486, "eval_runtime": 200.4409, "eval_samples_per_second": 17.935, "eval_steps_per_second": 2.245, "eval_wer": 0.22099327694643245, "step": 1400 }, { "epoch": 11.99, "learning_rate": 9.887990392313852e-05, "loss": 0.026, "step": 1500 }, { "epoch": 11.99, "eval_loss": 0.32988375425338745, "eval_runtime": 199.5224, "eval_samples_per_second": 18.018, "eval_steps_per_second": 2.255, "eval_wer": 0.22375840381695944, "step": 1500 }, { "epoch": 12.79, "learning_rate": 9.879983987189752e-05, "loss": 0.0259, "step": 1600 }, { "epoch": 12.79, "eval_loss": 0.3099309206008911, "eval_runtime": 198.0592, "eval_samples_per_second": 18.151, "eval_steps_per_second": 2.272, "eval_wer": 0.22045109520711342, "step": 1600 }, { "epoch": 13.6, "learning_rate": 9.871977582065654e-05, "loss": 0.0255, "step": 1700 }, { "epoch": 13.6, "eval_loss": 0.30543622374534607, "eval_runtime": 202.823, "eval_samples_per_second": 17.725, "eval_steps_per_second": 2.219, "eval_wer": 0.22402949468661895, "step": 1700 }, { "epoch": 14.4, "learning_rate": 9.863971176941553e-05, "loss": 0.0253, "step": 1800 }, { "epoch": 14.4, "eval_loss": 0.31324318051338196, "eval_runtime": 199.664, "eval_samples_per_second": 18.005, "eval_steps_per_second": 2.254, "eval_wer": 0.21779440468445022, "step": 1800 }, { "epoch": 15.2, "learning_rate": 9.855964771817454e-05, "loss": 0.0244, "step": 1900 }, { "epoch": 15.2, "eval_loss": 0.33187857270240784, "eval_runtime": 197.0859, "eval_samples_per_second": 18.241, "eval_steps_per_second": 2.283, "eval_wer": 0.22121014964216004, "step": 1900 }, { "epoch": 15.99, "learning_rate": 9.847958366693355e-05, "loss": 0.0231, "step": 2000 }, { "epoch": 15.99, "eval_loss": 0.33831512928009033, "eval_runtime": 200.252, "eval_samples_per_second": 17.952, "eval_steps_per_second": 2.247, "eval_wer": 0.21920407720667967, "step": 2000 }, { "epoch": 16.79, "learning_rate": 9.839951961569256e-05, "loss": 0.0235, "step": 2100 }, { "epoch": 16.79, "eval_loss": 0.31389620900154114, "eval_runtime": 198.2191, "eval_samples_per_second": 18.136, "eval_steps_per_second": 2.27, "eval_wer": 0.21622207764042506, "step": 2100 }, { "epoch": 17.6, "learning_rate": 9.831945556445156e-05, "loss": 0.0227, "step": 2200 }, { "epoch": 17.6, "eval_loss": 0.32037118077278137, "eval_runtime": 202.9198, "eval_samples_per_second": 17.716, "eval_steps_per_second": 2.218, "eval_wer": 0.217360659292995, "step": 2200 }, { "epoch": 18.4, "learning_rate": 9.823939151321058e-05, "loss": 0.0228, "step": 2300 }, { "epoch": 18.4, "eval_loss": 0.32169201970100403, "eval_runtime": 193.9, "eval_samples_per_second": 18.54, "eval_steps_per_second": 2.321, "eval_wer": 0.21757753198872262, "step": 2300 }, { "epoch": 19.2, "learning_rate": 9.815932746196959e-05, "loss": 0.0217, "step": 2400 }, { "epoch": 19.2, "eval_loss": 0.3112569749355316, "eval_runtime": 198.2135, "eval_samples_per_second": 18.137, "eval_steps_per_second": 2.27, "eval_wer": 0.2170895684233355, "step": 2400 }, { "epoch": 19.99, "learning_rate": 9.807926341072858e-05, "loss": 0.0212, "step": 2500 }, { "epoch": 19.99, "eval_loss": 0.31596991419792175, "eval_runtime": 199.2987, "eval_samples_per_second": 18.038, "eval_steps_per_second": 2.258, "eval_wer": 0.21351116894382996, "step": 2500 }, { "epoch": 20.79, "learning_rate": 9.79991993594876e-05, "loss": 0.0216, "step": 2600 }, { "epoch": 20.79, "eval_loss": 0.3226161003112793, "eval_runtime": 199.747, "eval_samples_per_second": 17.998, "eval_steps_per_second": 2.253, "eval_wer": 0.21378225981348947, "step": 2600 }, { "epoch": 21.6, "learning_rate": 9.79191353082466e-05, "loss": 0.0242, "step": 2700 }, { "epoch": 21.6, "eval_loss": 0.3281223177909851, "eval_runtime": 197.1976, "eval_samples_per_second": 18.23, "eval_steps_per_second": 2.282, "eval_wer": 0.2169269139015398, "step": 2700 }, { "epoch": 22.4, "learning_rate": 9.783907125700561e-05, "loss": 0.0245, "step": 2800 }, { "epoch": 22.4, "eval_loss": 0.3078162968158722, "eval_runtime": 198.6659, "eval_samples_per_second": 18.096, "eval_steps_per_second": 2.265, "eval_wer": 0.2176317501626545, "step": 2800 }, { "epoch": 23.2, "learning_rate": 9.775900720576461e-05, "loss": 0.0229, "step": 2900 }, { "epoch": 23.2, "eval_loss": 0.31995320320129395, "eval_runtime": 204.328, "eval_samples_per_second": 17.594, "eval_steps_per_second": 2.202, "eval_wer": 0.22045109520711342, "step": 2900 }, { "epoch": 23.99, "learning_rate": 9.767894315452363e-05, "loss": 0.0226, "step": 3000 }, { "epoch": 23.99, "eval_loss": 0.33075791597366333, "eval_runtime": 205.0698, "eval_samples_per_second": 17.531, "eval_steps_per_second": 2.194, "eval_wer": 0.2171437865972674, "step": 3000 }, { "epoch": 24.79, "learning_rate": 9.759887910328262e-05, "loss": 0.0213, "step": 3100 }, { "epoch": 24.79, "eval_loss": 0.3419627845287323, "eval_runtime": 206.2312, "eval_samples_per_second": 17.432, "eval_steps_per_second": 2.182, "eval_wer": 0.21936673172847537, "step": 3100 }, { "epoch": 25.6, "learning_rate": 9.751881505204163e-05, "loss": 0.0212, "step": 3200 }, { "epoch": 25.6, "eval_loss": 0.3389272093772888, "eval_runtime": 205.5665, "eval_samples_per_second": 17.488, "eval_steps_per_second": 2.189, "eval_wer": 0.21519193233571893, "step": 3200 }, { "epoch": 26.4, "learning_rate": 9.743875100080065e-05, "loss": 0.0208, "step": 3300 }, { "epoch": 26.4, "eval_loss": 0.32781022787094116, "eval_runtime": 208.0862, "eval_samples_per_second": 17.276, "eval_steps_per_second": 2.163, "eval_wer": 0.21275211450878334, "step": 3300 }, { "epoch": 27.2, "learning_rate": 9.735868694955965e-05, "loss": 0.0212, "step": 3400 }, { "epoch": 27.2, "eval_loss": 0.3169388771057129, "eval_runtime": 206.029, "eval_samples_per_second": 17.449, "eval_steps_per_second": 2.184, "eval_wer": 0.21497505963999133, "step": 3400 }, { "epoch": 27.99, "learning_rate": 9.727862289831866e-05, "loss": 0.0213, "step": 3500 }, { "epoch": 27.99, "eval_loss": 0.31786179542541504, "eval_runtime": 198.632, "eval_samples_per_second": 18.099, "eval_steps_per_second": 2.265, "eval_wer": 0.21757753198872262, "step": 3500 }, { "epoch": 28.79, "learning_rate": 9.719855884707766e-05, "loss": 0.0213, "step": 3600 }, { "epoch": 28.79, "eval_loss": 0.32053136825561523, "eval_runtime": 201.1109, "eval_samples_per_second": 17.876, "eval_steps_per_second": 2.238, "eval_wer": 0.2203426588592496, "step": 3600 }, { "epoch": 29.6, "learning_rate": 9.711849479583668e-05, "loss": 0.0217, "step": 3700 }, { "epoch": 29.6, "eval_loss": 0.33174052834510803, "eval_runtime": 191.0157, "eval_samples_per_second": 18.82, "eval_steps_per_second": 2.356, "eval_wer": 0.2168726957276079, "step": 3700 }, { "epoch": 30.4, "learning_rate": 9.703843074459567e-05, "loss": 0.0221, "step": 3800 }, { "epoch": 30.4, "eval_loss": 0.3100583255290985, "eval_runtime": 190.6873, "eval_samples_per_second": 18.853, "eval_steps_per_second": 2.36, "eval_wer": 0.21774018651051832, "step": 3800 }, { "epoch": 31.2, "learning_rate": 9.695836669335468e-05, "loss": 0.021, "step": 3900 }, { "epoch": 31.2, "eval_loss": 0.33132240176200867, "eval_runtime": 187.5625, "eval_samples_per_second": 19.167, "eval_steps_per_second": 2.399, "eval_wer": 0.21741487746692692, "step": 3900 }, { "epoch": 31.99, "learning_rate": 9.68783026421137e-05, "loss": 0.0212, "step": 4000 }, { "epoch": 31.99, "eval_loss": 0.32063281536102295, "eval_runtime": 192.7124, "eval_samples_per_second": 18.655, "eval_steps_per_second": 2.335, "eval_wer": 0.21340273259596618, "step": 4000 } ], "max_steps": 125000, "num_train_epochs": 1000, "total_flos": 9.112861160368682e+19, "trial_name": null, "trial_params": null }