|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 31.99304865938431, |
|
"global_step": 4000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0392, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_loss": 0.23765751719474792, |
|
"eval_runtime": 220.5891, |
|
"eval_samples_per_second": 16.297, |
|
"eval_steps_per_second": 2.04, |
|
"eval_wer": 0.21887876816308827, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 9.991993594875901e-05, |
|
"loss": 0.0336, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_loss": 0.26165521144866943, |
|
"eval_runtime": 157.9726, |
|
"eval_samples_per_second": 22.757, |
|
"eval_steps_per_second": 2.849, |
|
"eval_wer": 0.21649316851008457, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 9.983987189751803e-05, |
|
"loss": 0.0293, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_loss": 0.28323182463645935, |
|
"eval_runtime": 169.3448, |
|
"eval_samples_per_second": 21.229, |
|
"eval_steps_per_second": 2.657, |
|
"eval_wer": 0.2197462589459987, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 9.975980784627703e-05, |
|
"loss": 0.0283, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"eval_loss": 0.29311421513557434, |
|
"eval_runtime": 175.6178, |
|
"eval_samples_per_second": 20.471, |
|
"eval_steps_per_second": 2.562, |
|
"eval_wer": 0.21508349598785512, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 9.967974379503603e-05, |
|
"loss": 0.0274, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"eval_loss": 0.30566948652267456, |
|
"eval_runtime": 182.1945, |
|
"eval_samples_per_second": 19.732, |
|
"eval_steps_per_second": 2.47, |
|
"eval_wer": 0.21828236824983735, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 9.959967974379504e-05, |
|
"loss": 0.0264, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"eval_loss": 0.30197781324386597, |
|
"eval_runtime": 174.7969, |
|
"eval_samples_per_second": 20.567, |
|
"eval_steps_per_second": 2.574, |
|
"eval_wer": 0.2171437865972674, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 9.951961569255405e-05, |
|
"loss": 0.0259, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"eval_loss": 0.3002428412437439, |
|
"eval_runtime": 191.2838, |
|
"eval_samples_per_second": 18.794, |
|
"eval_steps_per_second": 2.353, |
|
"eval_wer": 0.21725222294513122, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 9.943955164131305e-05, |
|
"loss": 0.0254, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"eval_loss": 0.3097267746925354, |
|
"eval_runtime": 183.9687, |
|
"eval_samples_per_second": 19.541, |
|
"eval_steps_per_second": 2.446, |
|
"eval_wer": 0.21752331381479073, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 9.936028823058447e-05, |
|
"loss": 0.0252, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"eval_loss": 0.2970833480358124, |
|
"eval_runtime": 182.1563, |
|
"eval_samples_per_second": 19.736, |
|
"eval_steps_per_second": 2.47, |
|
"eval_wer": 0.2170353502494036, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 7.99, |
|
"learning_rate": 9.928022417934349e-05, |
|
"loss": 0.0234, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 7.99, |
|
"eval_loss": 0.31429246068000793, |
|
"eval_runtime": 193.7999, |
|
"eval_samples_per_second": 18.55, |
|
"eval_steps_per_second": 2.322, |
|
"eval_wer": 0.21405335068314899, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"learning_rate": 9.920016012810248e-05, |
|
"loss": 0.0228, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"eval_loss": 0.32985326647758484, |
|
"eval_runtime": 194.3544, |
|
"eval_samples_per_second": 18.497, |
|
"eval_steps_per_second": 2.315, |
|
"eval_wer": 0.2142702233788766, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 9.912009607686149e-05, |
|
"loss": 0.0236, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"eval_loss": 0.3165593147277832, |
|
"eval_runtime": 185.0001, |
|
"eval_samples_per_second": 19.432, |
|
"eval_steps_per_second": 2.432, |
|
"eval_wer": 0.21833658642376924, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 10.4, |
|
"learning_rate": 9.90400320256205e-05, |
|
"loss": 0.0241, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 10.4, |
|
"eval_loss": 0.3285478949546814, |
|
"eval_runtime": 181.9532, |
|
"eval_samples_per_second": 19.758, |
|
"eval_steps_per_second": 2.473, |
|
"eval_wer": 0.2192582953806116, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"learning_rate": 9.895996797437951e-05, |
|
"loss": 0.0243, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"eval_loss": 0.3187803626060486, |
|
"eval_runtime": 200.4409, |
|
"eval_samples_per_second": 17.935, |
|
"eval_steps_per_second": 2.245, |
|
"eval_wer": 0.22099327694643245, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 11.99, |
|
"learning_rate": 9.887990392313852e-05, |
|
"loss": 0.026, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 11.99, |
|
"eval_loss": 0.32988375425338745, |
|
"eval_runtime": 199.5224, |
|
"eval_samples_per_second": 18.018, |
|
"eval_steps_per_second": 2.255, |
|
"eval_wer": 0.22375840381695944, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 12.79, |
|
"learning_rate": 9.879983987189752e-05, |
|
"loss": 0.0259, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 12.79, |
|
"eval_loss": 0.3099309206008911, |
|
"eval_runtime": 198.0592, |
|
"eval_samples_per_second": 18.151, |
|
"eval_steps_per_second": 2.272, |
|
"eval_wer": 0.22045109520711342, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 13.6, |
|
"learning_rate": 9.871977582065654e-05, |
|
"loss": 0.0255, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 13.6, |
|
"eval_loss": 0.30543622374534607, |
|
"eval_runtime": 202.823, |
|
"eval_samples_per_second": 17.725, |
|
"eval_steps_per_second": 2.219, |
|
"eval_wer": 0.22402949468661895, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"learning_rate": 9.863971176941553e-05, |
|
"loss": 0.0253, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"eval_loss": 0.31324318051338196, |
|
"eval_runtime": 199.664, |
|
"eval_samples_per_second": 18.005, |
|
"eval_steps_per_second": 2.254, |
|
"eval_wer": 0.21779440468445022, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 15.2, |
|
"learning_rate": 9.855964771817454e-05, |
|
"loss": 0.0244, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 15.2, |
|
"eval_loss": 0.33187857270240784, |
|
"eval_runtime": 197.0859, |
|
"eval_samples_per_second": 18.241, |
|
"eval_steps_per_second": 2.283, |
|
"eval_wer": 0.22121014964216004, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 15.99, |
|
"learning_rate": 9.847958366693355e-05, |
|
"loss": 0.0231, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 15.99, |
|
"eval_loss": 0.33831512928009033, |
|
"eval_runtime": 200.252, |
|
"eval_samples_per_second": 17.952, |
|
"eval_steps_per_second": 2.247, |
|
"eval_wer": 0.21920407720667967, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 16.79, |
|
"learning_rate": 9.839951961569256e-05, |
|
"loss": 0.0235, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 16.79, |
|
"eval_loss": 0.31389620900154114, |
|
"eval_runtime": 198.2191, |
|
"eval_samples_per_second": 18.136, |
|
"eval_steps_per_second": 2.27, |
|
"eval_wer": 0.21622207764042506, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 17.6, |
|
"learning_rate": 9.831945556445156e-05, |
|
"loss": 0.0227, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 17.6, |
|
"eval_loss": 0.32037118077278137, |
|
"eval_runtime": 202.9198, |
|
"eval_samples_per_second": 17.716, |
|
"eval_steps_per_second": 2.218, |
|
"eval_wer": 0.217360659292995, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 18.4, |
|
"learning_rate": 9.823939151321058e-05, |
|
"loss": 0.0228, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 18.4, |
|
"eval_loss": 0.32169201970100403, |
|
"eval_runtime": 193.9, |
|
"eval_samples_per_second": 18.54, |
|
"eval_steps_per_second": 2.321, |
|
"eval_wer": 0.21757753198872262, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 19.2, |
|
"learning_rate": 9.815932746196959e-05, |
|
"loss": 0.0217, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 19.2, |
|
"eval_loss": 0.3112569749355316, |
|
"eval_runtime": 198.2135, |
|
"eval_samples_per_second": 18.137, |
|
"eval_steps_per_second": 2.27, |
|
"eval_wer": 0.2170895684233355, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 19.99, |
|
"learning_rate": 9.807926341072858e-05, |
|
"loss": 0.0212, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 19.99, |
|
"eval_loss": 0.31596991419792175, |
|
"eval_runtime": 199.2987, |
|
"eval_samples_per_second": 18.038, |
|
"eval_steps_per_second": 2.258, |
|
"eval_wer": 0.21351116894382996, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 20.79, |
|
"learning_rate": 9.79991993594876e-05, |
|
"loss": 0.0216, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 20.79, |
|
"eval_loss": 0.3226161003112793, |
|
"eval_runtime": 199.747, |
|
"eval_samples_per_second": 17.998, |
|
"eval_steps_per_second": 2.253, |
|
"eval_wer": 0.21378225981348947, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 21.6, |
|
"learning_rate": 9.79191353082466e-05, |
|
"loss": 0.0242, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 21.6, |
|
"eval_loss": 0.3281223177909851, |
|
"eval_runtime": 197.1976, |
|
"eval_samples_per_second": 18.23, |
|
"eval_steps_per_second": 2.282, |
|
"eval_wer": 0.2169269139015398, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 22.4, |
|
"learning_rate": 9.783907125700561e-05, |
|
"loss": 0.0245, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 22.4, |
|
"eval_loss": 0.3078162968158722, |
|
"eval_runtime": 198.6659, |
|
"eval_samples_per_second": 18.096, |
|
"eval_steps_per_second": 2.265, |
|
"eval_wer": 0.2176317501626545, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 23.2, |
|
"learning_rate": 9.775900720576461e-05, |
|
"loss": 0.0229, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 23.2, |
|
"eval_loss": 0.31995320320129395, |
|
"eval_runtime": 204.328, |
|
"eval_samples_per_second": 17.594, |
|
"eval_steps_per_second": 2.202, |
|
"eval_wer": 0.22045109520711342, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 23.99, |
|
"learning_rate": 9.767894315452363e-05, |
|
"loss": 0.0226, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 23.99, |
|
"eval_loss": 0.33075791597366333, |
|
"eval_runtime": 205.0698, |
|
"eval_samples_per_second": 17.531, |
|
"eval_steps_per_second": 2.194, |
|
"eval_wer": 0.2171437865972674, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 24.79, |
|
"learning_rate": 9.759887910328262e-05, |
|
"loss": 0.0213, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 24.79, |
|
"eval_loss": 0.3419627845287323, |
|
"eval_runtime": 206.2312, |
|
"eval_samples_per_second": 17.432, |
|
"eval_steps_per_second": 2.182, |
|
"eval_wer": 0.21936673172847537, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 25.6, |
|
"learning_rate": 9.751881505204163e-05, |
|
"loss": 0.0212, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 25.6, |
|
"eval_loss": 0.3389272093772888, |
|
"eval_runtime": 205.5665, |
|
"eval_samples_per_second": 17.488, |
|
"eval_steps_per_second": 2.189, |
|
"eval_wer": 0.21519193233571893, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 26.4, |
|
"learning_rate": 9.743875100080065e-05, |
|
"loss": 0.0208, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 26.4, |
|
"eval_loss": 0.32781022787094116, |
|
"eval_runtime": 208.0862, |
|
"eval_samples_per_second": 17.276, |
|
"eval_steps_per_second": 2.163, |
|
"eval_wer": 0.21275211450878334, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 27.2, |
|
"learning_rate": 9.735868694955965e-05, |
|
"loss": 0.0212, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 27.2, |
|
"eval_loss": 0.3169388771057129, |
|
"eval_runtime": 206.029, |
|
"eval_samples_per_second": 17.449, |
|
"eval_steps_per_second": 2.184, |
|
"eval_wer": 0.21497505963999133, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 27.99, |
|
"learning_rate": 9.727862289831866e-05, |
|
"loss": 0.0213, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 27.99, |
|
"eval_loss": 0.31786179542541504, |
|
"eval_runtime": 198.632, |
|
"eval_samples_per_second": 18.099, |
|
"eval_steps_per_second": 2.265, |
|
"eval_wer": 0.21757753198872262, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 28.79, |
|
"learning_rate": 9.719855884707766e-05, |
|
"loss": 0.0213, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 28.79, |
|
"eval_loss": 0.32053136825561523, |
|
"eval_runtime": 201.1109, |
|
"eval_samples_per_second": 17.876, |
|
"eval_steps_per_second": 2.238, |
|
"eval_wer": 0.2203426588592496, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 29.6, |
|
"learning_rate": 9.711849479583668e-05, |
|
"loss": 0.0217, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 29.6, |
|
"eval_loss": 0.33174052834510803, |
|
"eval_runtime": 191.0157, |
|
"eval_samples_per_second": 18.82, |
|
"eval_steps_per_second": 2.356, |
|
"eval_wer": 0.2168726957276079, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 30.4, |
|
"learning_rate": 9.703843074459567e-05, |
|
"loss": 0.0221, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 30.4, |
|
"eval_loss": 0.3100583255290985, |
|
"eval_runtime": 190.6873, |
|
"eval_samples_per_second": 18.853, |
|
"eval_steps_per_second": 2.36, |
|
"eval_wer": 0.21774018651051832, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 31.2, |
|
"learning_rate": 9.695836669335468e-05, |
|
"loss": 0.021, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 31.2, |
|
"eval_loss": 0.33132240176200867, |
|
"eval_runtime": 187.5625, |
|
"eval_samples_per_second": 19.167, |
|
"eval_steps_per_second": 2.399, |
|
"eval_wer": 0.21741487746692692, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 31.99, |
|
"learning_rate": 9.68783026421137e-05, |
|
"loss": 0.0212, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 31.99, |
|
"eval_loss": 0.32063281536102295, |
|
"eval_runtime": 192.7124, |
|
"eval_samples_per_second": 18.655, |
|
"eval_steps_per_second": 2.335, |
|
"eval_wer": 0.21340273259596618, |
|
"step": 4000 |
|
} |
|
], |
|
"max_steps": 125000, |
|
"num_train_epochs": 1000, |
|
"total_flos": 9.112861160368682e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|