|
{ |
|
"best_metric": 10.642644873699851, |
|
"best_model_checkpoint": "./checkpoint-5000", |
|
"epoch": 293.6470588235294, |
|
"global_step": 5000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 5.0453611334320685e-06, |
|
"loss": 0.6804, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 6.229195710491767e-06, |
|
"loss": 0.1847, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 6.903829450223392e-06, |
|
"loss": 0.0821, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 11.11, |
|
"learning_rate": 7.377725845391017e-06, |
|
"loss": 0.0485, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 13.89, |
|
"learning_rate": 7.743343231239583e-06, |
|
"loss": 0.0432, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 16.67, |
|
"learning_rate": 8.041073861170494e-06, |
|
"loss": 0.0328, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 19.44, |
|
"learning_rate": 8.292222957399574e-06, |
|
"loss": 0.0291, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 22.22, |
|
"learning_rate": 8.509413541357755e-06, |
|
"loss": 0.0298, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 8.700744577655557e-06, |
|
"loss": 0.0269, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 27.78, |
|
"learning_rate": 8.871723942761204e-06, |
|
"loss": 0.0272, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 30.56, |
|
"learning_rate": 9.026267958246849e-06, |
|
"loss": 0.027, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"learning_rate": 9.16726106663399e-06, |
|
"loss": 0.0213, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 36.11, |
|
"learning_rate": 9.296889251455016e-06, |
|
"loss": 0.0215, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 38.89, |
|
"learning_rate": 9.416848797368692e-06, |
|
"loss": 0.0195, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 41.67, |
|
"learning_rate": 9.528482449516371e-06, |
|
"loss": 0.0167, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 44.44, |
|
"learning_rate": 9.632871309784314e-06, |
|
"loss": 0.0184, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 47.22, |
|
"learning_rate": 9.73089868785391e-06, |
|
"loss": 0.0159, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 9.823295589572114e-06, |
|
"loss": 0.0172, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 52.78, |
|
"learning_rate": 9.910673836465484e-06, |
|
"loss": 0.0123, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 55.56, |
|
"learning_rate": 9.993550644973805e-06, |
|
"loss": 0.0144, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 58.33, |
|
"learning_rate": 9.951111111111111e-06, |
|
"loss": 0.0135, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 61.11, |
|
"learning_rate": 9.895555555555557e-06, |
|
"loss": 0.0128, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 63.89, |
|
"learning_rate": 9.84e-06, |
|
"loss": 0.0115, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 66.67, |
|
"learning_rate": 9.784444444444445e-06, |
|
"loss": 0.0105, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 69.44, |
|
"learning_rate": 9.72888888888889e-06, |
|
"loss": 0.0104, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 72.22, |
|
"learning_rate": 9.673333333333334e-06, |
|
"loss": 0.0087, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"learning_rate": 9.617777777777778e-06, |
|
"loss": 0.0091, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 77.78, |
|
"learning_rate": 9.562222222222223e-06, |
|
"loss": 0.0085, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 80.56, |
|
"learning_rate": 9.506666666666667e-06, |
|
"loss": 0.011, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 83.33, |
|
"learning_rate": 9.451111111111112e-06, |
|
"loss": 0.0117, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 86.11, |
|
"learning_rate": 9.395555555555556e-06, |
|
"loss": 0.0088, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 88.89, |
|
"learning_rate": 9.340000000000002e-06, |
|
"loss": 0.0077, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 91.67, |
|
"learning_rate": 9.284444444444444e-06, |
|
"loss": 0.0091, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 94.44, |
|
"learning_rate": 9.22888888888889e-06, |
|
"loss": 0.0067, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 97.22, |
|
"learning_rate": 9.173333333333334e-06, |
|
"loss": 0.0082, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 9.117777777777778e-06, |
|
"loss": 0.0055, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 102.78, |
|
"learning_rate": 9.062222222222224e-06, |
|
"loss": 0.0077, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 105.56, |
|
"learning_rate": 9.006666666666666e-06, |
|
"loss": 0.0055, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 108.33, |
|
"learning_rate": 8.951111111111112e-06, |
|
"loss": 0.005, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 111.11, |
|
"learning_rate": 8.895555555555556e-06, |
|
"loss": 0.0066, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 111.11, |
|
"eval_loss": 0.2357177734375, |
|
"eval_runtime": 64.7785, |
|
"eval_samples_per_second": 2.022, |
|
"eval_steps_per_second": 0.139, |
|
"eval_wer": 23.044096728307252, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 113.89, |
|
"learning_rate": 8.844444444444445e-06, |
|
"loss": 0.0057, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 116.67, |
|
"learning_rate": 8.788888888888891e-06, |
|
"loss": 0.0096, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 119.44, |
|
"learning_rate": 8.733333333333333e-06, |
|
"loss": 0.0063, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 122.22, |
|
"learning_rate": 8.677777777777779e-06, |
|
"loss": 0.0069, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 125.0, |
|
"learning_rate": 8.622222222222223e-06, |
|
"loss": 0.0069, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 127.78, |
|
"learning_rate": 8.566666666666667e-06, |
|
"loss": 0.0046, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 130.56, |
|
"learning_rate": 8.511111111111113e-06, |
|
"loss": 0.0051, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 133.33, |
|
"learning_rate": 8.455555555555555e-06, |
|
"loss": 0.0055, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 136.11, |
|
"learning_rate": 8.400000000000001e-06, |
|
"loss": 0.0042, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 138.89, |
|
"learning_rate": 8.344444444444445e-06, |
|
"loss": 0.0042, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 141.67, |
|
"learning_rate": 8.288888888888889e-06, |
|
"loss": 0.005, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 144.44, |
|
"learning_rate": 8.233333333333335e-06, |
|
"loss": 0.0054, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 147.22, |
|
"learning_rate": 8.177777777777779e-06, |
|
"loss": 0.0052, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 150.0, |
|
"learning_rate": 8.122222222222223e-06, |
|
"loss": 0.0057, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 152.78, |
|
"learning_rate": 8.066666666666667e-06, |
|
"loss": 0.0039, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 155.56, |
|
"learning_rate": 8.011111111111113e-06, |
|
"loss": 0.0032, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 158.33, |
|
"learning_rate": 7.955555555555557e-06, |
|
"loss": 0.0034, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 161.11, |
|
"learning_rate": 7.902222222222223e-06, |
|
"loss": 0.0068, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 163.89, |
|
"learning_rate": 7.846666666666667e-06, |
|
"loss": 0.0034, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 166.67, |
|
"learning_rate": 7.791111111111111e-06, |
|
"loss": 0.0026, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 169.44, |
|
"learning_rate": 7.735555555555557e-06, |
|
"loss": 0.0036, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 172.22, |
|
"learning_rate": 7.680000000000001e-06, |
|
"loss": 0.0033, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 175.0, |
|
"learning_rate": 7.624444444444445e-06, |
|
"loss": 0.0021, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 177.78, |
|
"learning_rate": 7.56888888888889e-06, |
|
"loss": 0.0033, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 180.56, |
|
"learning_rate": 7.513333333333334e-06, |
|
"loss": 0.0037, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 183.33, |
|
"learning_rate": 7.457777777777778e-06, |
|
"loss": 0.0032, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 186.11, |
|
"learning_rate": 7.402222222222223e-06, |
|
"loss": 0.0037, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 188.89, |
|
"learning_rate": 7.346666666666668e-06, |
|
"loss": 0.0022, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 191.67, |
|
"learning_rate": 7.291111111111112e-06, |
|
"loss": 0.0024, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 194.44, |
|
"learning_rate": 7.235555555555556e-06, |
|
"loss": 0.0026, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 197.22, |
|
"learning_rate": 7.180000000000001e-06, |
|
"loss": 0.0022, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"learning_rate": 7.124444444444445e-06, |
|
"loss": 0.0026, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 202.78, |
|
"learning_rate": 7.06888888888889e-06, |
|
"loss": 0.0032, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 205.56, |
|
"learning_rate": 7.0133333333333345e-06, |
|
"loss": 0.0033, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 208.33, |
|
"learning_rate": 6.9577777777777785e-06, |
|
"loss": 0.0027, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 211.11, |
|
"learning_rate": 6.902222222222223e-06, |
|
"loss": 0.0043, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 213.89, |
|
"learning_rate": 6.846666666666667e-06, |
|
"loss": 0.0028, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 216.67, |
|
"learning_rate": 6.7911111111111115e-06, |
|
"loss": 0.0012, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 219.44, |
|
"learning_rate": 6.735555555555556e-06, |
|
"loss": 0.0015, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 222.22, |
|
"learning_rate": 6.680000000000001e-06, |
|
"loss": 0.0024, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 222.22, |
|
"eval_loss": 0.2607421875, |
|
"eval_runtime": 57.0802, |
|
"eval_samples_per_second": 2.295, |
|
"eval_steps_per_second": 0.158, |
|
"eval_wer": 19.665718349928877, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 225.0, |
|
"learning_rate": 6.6244444444444445e-06, |
|
"loss": 0.0029, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 227.78, |
|
"learning_rate": 6.568888888888889e-06, |
|
"loss": 0.0021, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 230.56, |
|
"learning_rate": 6.513333333333333e-06, |
|
"loss": 0.0022, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 233.33, |
|
"learning_rate": 6.457777777777778e-06, |
|
"loss": 0.0022, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 236.11, |
|
"learning_rate": 6.402222222222223e-06, |
|
"loss": 0.0011, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 238.89, |
|
"learning_rate": 6.346666666666668e-06, |
|
"loss": 0.0026, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 241.67, |
|
"learning_rate": 6.291111111111111e-06, |
|
"loss": 0.0021, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 244.44, |
|
"learning_rate": 6.235555555555556e-06, |
|
"loss": 0.0016, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 247.22, |
|
"learning_rate": 6.18e-06, |
|
"loss": 0.0024, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 250.0, |
|
"learning_rate": 6.124444444444445e-06, |
|
"loss": 0.0046, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 252.78, |
|
"learning_rate": 6.06888888888889e-06, |
|
"loss": 0.0018, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 255.56, |
|
"learning_rate": 6.013333333333335e-06, |
|
"loss": 0.0012, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 258.33, |
|
"learning_rate": 5.957777777777778e-06, |
|
"loss": 0.0014, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 261.11, |
|
"learning_rate": 5.902222222222223e-06, |
|
"loss": 0.0007, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 263.89, |
|
"learning_rate": 5.846666666666667e-06, |
|
"loss": 0.0014, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 266.67, |
|
"learning_rate": 5.791111111111112e-06, |
|
"loss": 0.0009, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 269.44, |
|
"learning_rate": 5.735555555555557e-06, |
|
"loss": 0.0008, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 272.22, |
|
"learning_rate": 5.68e-06, |
|
"loss": 0.0028, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 275.0, |
|
"learning_rate": 5.624444444444445e-06, |
|
"loss": 0.002, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 277.78, |
|
"learning_rate": 5.56888888888889e-06, |
|
"loss": 0.0011, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 280.56, |
|
"learning_rate": 5.513333333333334e-06, |
|
"loss": 0.001, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 283.33, |
|
"learning_rate": 5.4577777777777785e-06, |
|
"loss": 0.0007, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 286.11, |
|
"learning_rate": 5.402222222222223e-06, |
|
"loss": 0.0007, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 288.89, |
|
"learning_rate": 5.346666666666667e-06, |
|
"loss": 0.0008, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 291.67, |
|
"learning_rate": 5.2911111111111115e-06, |
|
"loss": 0.0012, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 294.44, |
|
"learning_rate": 5.235555555555556e-06, |
|
"loss": 0.0016, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 297.22, |
|
"learning_rate": 5.18e-06, |
|
"loss": 0.0012, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 300.0, |
|
"learning_rate": 5.124444444444445e-06, |
|
"loss": 0.001, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 302.78, |
|
"learning_rate": 5.06888888888889e-06, |
|
"loss": 0.0012, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 305.56, |
|
"learning_rate": 5.013333333333333e-06, |
|
"loss": 0.001, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 308.33, |
|
"learning_rate": 4.957777777777778e-06, |
|
"loss": 0.0013, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 311.11, |
|
"learning_rate": 4.902222222222222e-06, |
|
"loss": 0.0015, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 313.89, |
|
"learning_rate": 4.846666666666667e-06, |
|
"loss": 0.0014, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 316.67, |
|
"learning_rate": 4.791111111111111e-06, |
|
"loss": 0.0007, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 319.44, |
|
"learning_rate": 4.735555555555556e-06, |
|
"loss": 0.0009, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 322.22, |
|
"learning_rate": 4.680000000000001e-06, |
|
"loss": 0.0021, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 325.0, |
|
"learning_rate": 4.624444444444445e-06, |
|
"loss": 0.0015, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 327.78, |
|
"learning_rate": 4.568888888888889e-06, |
|
"loss": 0.0012, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 330.56, |
|
"learning_rate": 4.513333333333333e-06, |
|
"loss": 0.0009, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 333.33, |
|
"learning_rate": 4.457777777777778e-06, |
|
"loss": 0.0011, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 333.33, |
|
"eval_loss": 0.277099609375, |
|
"eval_runtime": 58.1634, |
|
"eval_samples_per_second": 2.252, |
|
"eval_steps_per_second": 0.155, |
|
"eval_wer": 20.874822190611663, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 177.47, |
|
"learning_rate": 1.760888888888889e-06, |
|
"loss": 0.5801, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 178.94, |
|
"learning_rate": 1.7386666666666666e-06, |
|
"loss": 0.1501, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 180.41, |
|
"learning_rate": 1.7164444444444444e-06, |
|
"loss": 0.0789, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 181.88, |
|
"learning_rate": 1.6942222222222222e-06, |
|
"loss": 0.0531, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 183.35, |
|
"learning_rate": 1.6719999999999998e-06, |
|
"loss": 0.0409, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 184.82, |
|
"learning_rate": 1.6497777777777777e-06, |
|
"loss": 0.032, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 186.29, |
|
"learning_rate": 1.6275555555555555e-06, |
|
"loss": 0.0251, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 187.76, |
|
"learning_rate": 1.6053333333333333e-06, |
|
"loss": 0.0203, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 189.24, |
|
"learning_rate": 1.5831111111111111e-06, |
|
"loss": 0.0167, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 190.71, |
|
"learning_rate": 1.560888888888889e-06, |
|
"loss": 0.0159, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 192.18, |
|
"learning_rate": 1.5386666666666666e-06, |
|
"loss": 0.0137, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 193.65, |
|
"learning_rate": 1.5164444444444444e-06, |
|
"loss": 0.0122, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 195.12, |
|
"learning_rate": 1.494222222222222e-06, |
|
"loss": 0.0106, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 196.59, |
|
"learning_rate": 1.4719999999999998e-06, |
|
"loss": 0.0094, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 198.06, |
|
"learning_rate": 1.4497777777777777e-06, |
|
"loss": 0.009, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 199.53, |
|
"learning_rate": 1.4275555555555555e-06, |
|
"loss": 0.0104, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 201.0, |
|
"learning_rate": 1.4053333333333333e-06, |
|
"loss": 0.0069, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 202.47, |
|
"learning_rate": 1.3848888888888889e-06, |
|
"loss": 0.0073, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 203.94, |
|
"learning_rate": 1.3626666666666667e-06, |
|
"loss": 0.0073, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 205.41, |
|
"learning_rate": 1.3404444444444445e-06, |
|
"loss": 0.0063, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 206.88, |
|
"learning_rate": 1.3182222222222221e-06, |
|
"loss": 0.007, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 208.35, |
|
"learning_rate": 1.296e-06, |
|
"loss": 0.0061, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 209.82, |
|
"learning_rate": 1.2737777777777776e-06, |
|
"loss": 0.0053, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 211.29, |
|
"learning_rate": 1.2515555555555554e-06, |
|
"loss": 0.0056, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 212.76, |
|
"learning_rate": 1.2293333333333334e-06, |
|
"loss": 0.005, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 214.24, |
|
"learning_rate": 1.207111111111111e-06, |
|
"loss": 0.0047, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 215.71, |
|
"learning_rate": 1.1848888888888889e-06, |
|
"loss": 0.0052, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 217.18, |
|
"learning_rate": 1.1626666666666667e-06, |
|
"loss": 0.0044, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 218.65, |
|
"learning_rate": 1.1404444444444443e-06, |
|
"loss": 0.0046, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 220.12, |
|
"learning_rate": 1.1182222222222221e-06, |
|
"loss": 0.0045, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 221.59, |
|
"learning_rate": 1.096e-06, |
|
"loss": 0.0041, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 223.06, |
|
"learning_rate": 1.0737777777777776e-06, |
|
"loss": 0.0054, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 224.53, |
|
"learning_rate": 1.0515555555555556e-06, |
|
"loss": 0.0038, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 226.0, |
|
"learning_rate": 1.0293333333333334e-06, |
|
"loss": 0.0038, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 227.47, |
|
"learning_rate": 1.007111111111111e-06, |
|
"loss": 0.004, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 228.94, |
|
"learning_rate": 9.848888888888889e-07, |
|
"loss": 0.0036, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 230.41, |
|
"learning_rate": 9.626666666666667e-07, |
|
"loss": 0.0041, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 231.88, |
|
"learning_rate": 9.404444444444443e-07, |
|
"loss": 0.0032, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 233.35, |
|
"learning_rate": 9.182222222222223e-07, |
|
"loss": 0.0038, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 234.82, |
|
"learning_rate": 8.96e-07, |
|
"loss": 0.0043, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 234.82, |
|
"eval_loss": 0.45361328125, |
|
"eval_runtime": 157.593, |
|
"eval_samples_per_second": 1.726, |
|
"eval_steps_per_second": 0.108, |
|
"eval_wer": 10.707652303120357, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 236.29, |
|
"learning_rate": 8.737777777777777e-07, |
|
"loss": 0.004, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 237.76, |
|
"learning_rate": 8.515555555555555e-07, |
|
"loss": 0.0029, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 239.24, |
|
"learning_rate": 8.293333333333333e-07, |
|
"loss": 0.0034, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 240.71, |
|
"learning_rate": 8.071111111111111e-07, |
|
"loss": 0.0032, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 242.18, |
|
"learning_rate": 7.848888888888888e-07, |
|
"loss": 0.003, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 243.65, |
|
"learning_rate": 7.626666666666667e-07, |
|
"loss": 0.0034, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 245.12, |
|
"learning_rate": 7.404444444444444e-07, |
|
"loss": 0.0032, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 246.59, |
|
"learning_rate": 7.182222222222222e-07, |
|
"loss": 0.0032, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 248.06, |
|
"learning_rate": 6.959999999999999e-07, |
|
"loss": 0.0028, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 249.53, |
|
"learning_rate": 6.737777777777778e-07, |
|
"loss": 0.0028, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 251.0, |
|
"learning_rate": 6.515555555555555e-07, |
|
"loss": 0.0025, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 252.47, |
|
"learning_rate": 6.293333333333333e-07, |
|
"loss": 0.0026, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 253.94, |
|
"learning_rate": 6.071111111111111e-07, |
|
"loss": 0.003, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 255.41, |
|
"learning_rate": 5.848888888888889e-07, |
|
"loss": 0.0026, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 256.88, |
|
"learning_rate": 5.626666666666666e-07, |
|
"loss": 0.0027, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 258.35, |
|
"learning_rate": 5.404444444444443e-07, |
|
"loss": 0.003, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 259.82, |
|
"learning_rate": 5.182222222222223e-07, |
|
"loss": 0.0027, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 261.29, |
|
"learning_rate": 4.977777777777777e-07, |
|
"loss": 0.0026, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 262.76, |
|
"learning_rate": 4.7555555555555554e-07, |
|
"loss": 0.0023, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 264.24, |
|
"learning_rate": 4.5333333333333326e-07, |
|
"loss": 0.0021, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 265.71, |
|
"learning_rate": 4.311111111111111e-07, |
|
"loss": 0.0022, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 267.18, |
|
"learning_rate": 4.088888888888889e-07, |
|
"loss": 0.0034, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 268.65, |
|
"learning_rate": 3.8666666666666664e-07, |
|
"loss": 0.0023, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 270.12, |
|
"learning_rate": 3.6444444444444446e-07, |
|
"loss": 0.0022, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 271.59, |
|
"learning_rate": 3.422222222222222e-07, |
|
"loss": 0.0022, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 273.06, |
|
"learning_rate": 3.2e-07, |
|
"loss": 0.0024, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 274.53, |
|
"learning_rate": 2.9777777777777773e-07, |
|
"loss": 0.0031, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 276.0, |
|
"learning_rate": 2.7555555555555555e-07, |
|
"loss": 0.0022, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 277.47, |
|
"learning_rate": 2.533333333333333e-07, |
|
"loss": 0.0022, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 278.94, |
|
"learning_rate": 2.311111111111111e-07, |
|
"loss": 0.0021, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 280.41, |
|
"learning_rate": 2.088888888888889e-07, |
|
"loss": 0.0023, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 281.88, |
|
"learning_rate": 1.8666666666666667e-07, |
|
"loss": 0.0025, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 283.35, |
|
"learning_rate": 1.6444444444444444e-07, |
|
"loss": 0.0022, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 284.82, |
|
"learning_rate": 1.4222222222222222e-07, |
|
"loss": 0.0022, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 286.29, |
|
"learning_rate": 1.2e-07, |
|
"loss": 0.0021, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 287.76, |
|
"learning_rate": 9.777777777777778e-08, |
|
"loss": 0.0023, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 289.24, |
|
"learning_rate": 7.555555555555555e-08, |
|
"loss": 0.002, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 290.71, |
|
"learning_rate": 5.3333333333333334e-08, |
|
"loss": 0.0025, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 292.18, |
|
"learning_rate": 3.111111111111111e-08, |
|
"loss": 0.002, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 293.65, |
|
"learning_rate": 8.888888888888889e-09, |
|
"loss": 0.0024, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 293.65, |
|
"eval_loss": 0.465576171875, |
|
"eval_runtime": 158.123, |
|
"eval_samples_per_second": 1.72, |
|
"eval_steps_per_second": 0.108, |
|
"eval_wer": 10.642644873699851, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 293.65, |
|
"step": 5000, |
|
"total_flos": 1.555167153934817e+20, |
|
"train_loss": 0.006518007612228393, |
|
"train_runtime": 7844.7939, |
|
"train_samples_per_second": 20.396, |
|
"train_steps_per_second": 0.637 |
|
} |
|
], |
|
"max_steps": 5000, |
|
"num_train_epochs": 295, |
|
"total_flos": 1.555167153934817e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|