wav2vec2-xls-r-1b-uk / trainer_state.json
arampacha's picture
model upd
9ff1856
{
"best_metric": 0.10924588888883591,
"best_model_checkpoint": "./checkpoint-12000",
"epoch": 38.58520900321543,
"global_step": 12000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.32,
"learning_rate": 4.50125e-06,
"loss": 8.0884,
"step": 100
},
{
"epoch": 0.64,
"learning_rate": 8.62625e-06,
"loss": 3.2246,
"step": 200
},
{
"epoch": 0.96,
"learning_rate": 1.2751250000000001e-05,
"loss": 3.1607,
"step": 300
},
{
"epoch": 1.29,
"learning_rate": 1.6876250000000003e-05,
"loss": 2.3964,
"step": 400
},
{
"epoch": 1.61,
"learning_rate": 2.100125e-05,
"loss": 1.7005,
"step": 500
},
{
"epoch": 1.61,
"eval_cer": 0.11636638864978778,
"eval_loss": 0.4082379639148712,
"eval_runtime": 199.3377,
"eval_samples_per_second": 29.106,
"eval_steps_per_second": 0.457,
"eval_wer": 0.5583793477747888,
"step": 500
},
{
"epoch": 1.93,
"learning_rate": 2.512625e-05,
"loss": 1.4874,
"step": 600
},
{
"epoch": 2.25,
"learning_rate": 2.9251250000000002e-05,
"loss": 1.3431,
"step": 700
},
{
"epoch": 2.57,
"learning_rate": 3.3376250000000004e-05,
"loss": 1.2316,
"step": 800
},
{
"epoch": 2.89,
"learning_rate": 3.750125e-05,
"loss": 1.187,
"step": 900
},
{
"epoch": 3.22,
"learning_rate": 4.162625e-05,
"loss": 1.1555,
"step": 1000
},
{
"epoch": 3.22,
"eval_cer": 0.05566026535276483,
"eval_loss": 0.2020130306482315,
"eval_runtime": 199.2116,
"eval_samples_per_second": 29.125,
"eval_steps_per_second": 0.457,
"eval_wer": 0.29534192269573833,
"step": 1000
},
{
"epoch": 3.54,
"learning_rate": 4.575125e-05,
"loss": 1.1286,
"step": 1100
},
{
"epoch": 3.86,
"learning_rate": 4.9876250000000005e-05,
"loss": 1.1143,
"step": 1200
},
{
"epoch": 4.18,
"learning_rate": 5e-05,
"loss": 1.1067,
"step": 1300
},
{
"epoch": 4.5,
"learning_rate": 5e-05,
"loss": 1.0992,
"step": 1400
},
{
"epoch": 4.82,
"learning_rate": 5e-05,
"loss": 1.0927,
"step": 1500
},
{
"epoch": 4.82,
"eval_cer": 0.04799130331542548,
"eval_loss": 0.1707664430141449,
"eval_runtime": 197.8453,
"eval_samples_per_second": 29.326,
"eval_steps_per_second": 0.46,
"eval_wer": 0.25843598093350323,
"step": 1500
},
{
"epoch": 5.14,
"learning_rate": 5e-05,
"loss": 1.0907,
"step": 1600
},
{
"epoch": 5.47,
"learning_rate": 5e-05,
"loss": 1.0765,
"step": 1700
},
{
"epoch": 5.79,
"learning_rate": 5e-05,
"loss": 1.0693,
"step": 1800
},
{
"epoch": 6.11,
"learning_rate": 5e-05,
"loss": 1.0547,
"step": 1900
},
{
"epoch": 6.43,
"learning_rate": 5e-05,
"loss": 1.0707,
"step": 2000
},
{
"epoch": 6.43,
"eval_cer": 0.04497120892820041,
"eval_loss": 0.15630319714546204,
"eval_runtime": 197.159,
"eval_samples_per_second": 29.428,
"eval_steps_per_second": 0.462,
"eval_wer": 0.24054934163952996,
"step": 2000
},
{
"epoch": 6.75,
"learning_rate": 5e-05,
"loss": 1.0647,
"step": 2100
},
{
"epoch": 7.07,
"learning_rate": 5e-05,
"loss": 1.054,
"step": 2200
},
{
"epoch": 7.4,
"learning_rate": 5e-05,
"loss": 1.0478,
"step": 2300
},
{
"epoch": 7.72,
"learning_rate": 5e-05,
"loss": 1.0611,
"step": 2400
},
{
"epoch": 8.04,
"learning_rate": 5e-05,
"loss": 1.0728,
"step": 2500
},
{
"epoch": 8.04,
"eval_cer": 0.04629574663856816,
"eval_loss": 0.16203930974006653,
"eval_runtime": 196.804,
"eval_samples_per_second": 29.481,
"eval_steps_per_second": 0.462,
"eval_wer": 0.2442304969559677,
"step": 2500
},
{
"epoch": 8.36,
"learning_rate": 5e-05,
"loss": 1.0563,
"step": 2600
},
{
"epoch": 8.68,
"learning_rate": 5e-05,
"loss": 1.0404,
"step": 2700
},
{
"epoch": 9.0,
"learning_rate": 5e-05,
"loss": 1.0791,
"step": 2800
},
{
"epoch": 9.32,
"learning_rate": 5e-05,
"loss": 1.0535,
"step": 2900
},
{
"epoch": 9.65,
"learning_rate": 5e-05,
"loss": 1.0268,
"step": 3000
},
{
"epoch": 9.65,
"eval_cer": 0.04575776913715829,
"eval_loss": 0.15875375270843506,
"eval_runtime": 201.4769,
"eval_samples_per_second": 28.797,
"eval_steps_per_second": 0.452,
"eval_wer": 0.2377884751522016,
"step": 3000
},
{
"epoch": 9.97,
"learning_rate": 5e-05,
"loss": 1.0322,
"step": 3100
},
{
"epoch": 10.29,
"learning_rate": 5e-05,
"loss": 1.0208,
"step": 3200
},
{
"epoch": 10.61,
"learning_rate": 5e-05,
"loss": 1.0172,
"step": 3300
},
{
"epoch": 10.93,
"learning_rate": 5e-05,
"loss": 1.019,
"step": 3400
},
{
"epoch": 11.25,
"learning_rate": 5e-05,
"loss": 1.0328,
"step": 3500
},
{
"epoch": 11.25,
"eval_cer": 0.04419206909857232,
"eval_loss": 0.14661966264247894,
"eval_runtime": 196.9894,
"eval_samples_per_second": 29.453,
"eval_steps_per_second": 0.462,
"eval_wer": 0.23516919156165936,
"step": 3500
},
{
"epoch": 11.58,
"learning_rate": 5e-05,
"loss": 1.0153,
"step": 3600
},
{
"epoch": 11.9,
"learning_rate": 5e-05,
"loss": 1.0206,
"step": 3700
},
{
"epoch": 12.22,
"learning_rate": 5e-05,
"loss": 1.0168,
"step": 3800
},
{
"epoch": 12.54,
"learning_rate": 5e-05,
"loss": 1.0269,
"step": 3900
},
{
"epoch": 12.86,
"learning_rate": 5e-05,
"loss": 1.0249,
"step": 4000
},
{
"epoch": 12.86,
"eval_cer": 0.04486361342791843,
"eval_loss": 0.15519459545612335,
"eval_runtime": 197.5966,
"eval_samples_per_second": 29.363,
"eval_steps_per_second": 0.461,
"eval_wer": 0.23413091698522817,
"step": 4000
},
{
"epoch": 13.18,
"learning_rate": 5e-05,
"loss": 1.022,
"step": 4100
},
{
"epoch": 13.5,
"learning_rate": 5e-05,
"loss": 1.0219,
"step": 4200
},
{
"epoch": 13.83,
"learning_rate": 5e-05,
"loss": 1.0203,
"step": 4300
},
{
"epoch": 14.15,
"learning_rate": 5e-05,
"loss": 1.0171,
"step": 4400
},
{
"epoch": 14.47,
"learning_rate": 5e-05,
"loss": 1.016,
"step": 4500
},
{
"epoch": 14.47,
"eval_cer": 0.047286367279095305,
"eval_loss": 0.16016805171966553,
"eval_runtime": 197.4133,
"eval_samples_per_second": 29.39,
"eval_steps_per_second": 0.461,
"eval_wer": 0.2435461796215017,
"step": 4500
},
{
"epoch": 14.79,
"learning_rate": 5e-05,
"loss": 1.0233,
"step": 4600
},
{
"epoch": 15.11,
"learning_rate": 5e-05,
"loss": 1.0139,
"step": 4700
},
{
"epoch": 15.43,
"learning_rate": 5e-05,
"loss": 1.0252,
"step": 4800
},
{
"epoch": 15.76,
"learning_rate": 4.936666666666667e-05,
"loss": 1.0305,
"step": 4900
},
{
"epoch": 16.08,
"learning_rate": 4.870694444444445e-05,
"loss": 1.0164,
"step": 5000
},
{
"epoch": 16.08,
"eval_cer": 0.044392419340476684,
"eval_loss": 0.14910832047462463,
"eval_runtime": 205.8325,
"eval_samples_per_second": 28.188,
"eval_steps_per_second": 0.442,
"eval_wer": 0.23372976544433433,
"step": 5000
},
{
"epoch": 16.4,
"learning_rate": 4.804722222222223e-05,
"loss": 1.0029,
"step": 5100
},
{
"epoch": 16.72,
"learning_rate": 4.73875e-05,
"loss": 0.9924,
"step": 5200
},
{
"epoch": 17.04,
"learning_rate": 4.672777777777778e-05,
"loss": 1.0058,
"step": 5300
},
{
"epoch": 17.36,
"learning_rate": 4.606805555555556e-05,
"loss": 0.996,
"step": 5400
},
{
"epoch": 17.68,
"learning_rate": 4.540833333333334e-05,
"loss": 0.9935,
"step": 5500
},
{
"epoch": 17.68,
"eval_cer": 0.045754058947493396,
"eval_loss": 0.15390604734420776,
"eval_runtime": 206.7044,
"eval_samples_per_second": 28.069,
"eval_steps_per_second": 0.44,
"eval_wer": 0.23729293501345036,
"step": 5500
},
{
"epoch": 18.01,
"learning_rate": 4.4748611111111116e-05,
"loss": 0.9993,
"step": 5600
},
{
"epoch": 18.33,
"learning_rate": 4.408888888888889e-05,
"loss": 0.983,
"step": 5700
},
{
"epoch": 18.65,
"learning_rate": 4.342916666666667e-05,
"loss": 0.9794,
"step": 5800
},
{
"epoch": 18.97,
"learning_rate": 4.2769444444444447e-05,
"loss": 0.9719,
"step": 5900
},
{
"epoch": 19.29,
"learning_rate": 4.2109722222222226e-05,
"loss": 0.9626,
"step": 6000
},
{
"epoch": 19.29,
"eval_cer": 0.04342777002760381,
"eval_loss": 0.1458132266998291,
"eval_runtime": 201.2355,
"eval_samples_per_second": 28.832,
"eval_steps_per_second": 0.452,
"eval_wer": 0.2305441502666478,
"step": 6000
},
{
"epoch": 19.61,
"learning_rate": 4.145e-05,
"loss": 0.9542,
"step": 6100
},
{
"epoch": 19.94,
"learning_rate": 4.079027777777778e-05,
"loss": 0.978,
"step": 6200
},
{
"epoch": 20.26,
"learning_rate": 4.013055555555556e-05,
"loss": 0.9536,
"step": 6300
},
{
"epoch": 20.58,
"learning_rate": 3.9470833333333335e-05,
"loss": 0.9627,
"step": 6400
},
{
"epoch": 20.9,
"learning_rate": 3.8811111111111114e-05,
"loss": 0.9505,
"step": 6500
},
{
"epoch": 20.9,
"eval_cer": 0.04073046214122466,
"eval_loss": 0.13684287667274475,
"eval_runtime": 202.0319,
"eval_samples_per_second": 28.718,
"eval_steps_per_second": 0.45,
"eval_wer": 0.21565434895464627,
"step": 6500
},
{
"epoch": 21.22,
"learning_rate": 3.815138888888889e-05,
"loss": 0.9395,
"step": 6600
},
{
"epoch": 21.54,
"learning_rate": 3.749166666666667e-05,
"loss": 0.9393,
"step": 6700
},
{
"epoch": 21.86,
"learning_rate": 3.6831944444444444e-05,
"loss": 0.9541,
"step": 6800
},
{
"epoch": 22.19,
"learning_rate": 3.6172222222222224e-05,
"loss": 0.9538,
"step": 6900
},
{
"epoch": 22.51,
"learning_rate": 3.55125e-05,
"loss": 0.9389,
"step": 7000
},
{
"epoch": 22.51,
"eval_cer": 0.042626369059986347,
"eval_loss": 0.14371351897716522,
"eval_runtime": 197.7954,
"eval_samples_per_second": 29.333,
"eval_steps_per_second": 0.46,
"eval_wer": 0.22306385388645053,
"step": 7000
},
{
"epoch": 22.83,
"learning_rate": 3.485277777777778e-05,
"loss": 0.9429,
"step": 7100
},
{
"epoch": 23.15,
"learning_rate": 3.419965277777778e-05,
"loss": 0.9407,
"step": 7200
},
{
"epoch": 23.47,
"learning_rate": 3.353993055555556e-05,
"loss": 0.9224,
"step": 7300
},
{
"epoch": 23.79,
"learning_rate": 3.288020833333334e-05,
"loss": 0.9197,
"step": 7400
},
{
"epoch": 24.12,
"learning_rate": 3.2220486111111115e-05,
"loss": 0.9129,
"step": 7500
},
{
"epoch": 24.12,
"eval_cer": 0.039372532723872845,
"eval_loss": 0.13133755326271057,
"eval_runtime": 209.4773,
"eval_samples_per_second": 27.698,
"eval_steps_per_second": 0.434,
"eval_wer": 0.20760772098730473,
"step": 7500
},
{
"epoch": 24.44,
"learning_rate": 3.156076388888889e-05,
"loss": 0.9169,
"step": 7600
},
{
"epoch": 24.76,
"learning_rate": 3.090763888888889e-05,
"loss": 0.9133,
"step": 7700
},
{
"epoch": 25.08,
"learning_rate": 3.024791666666667e-05,
"loss": 0.9068,
"step": 7800
},
{
"epoch": 25.4,
"learning_rate": 2.958819444444445e-05,
"loss": 0.9137,
"step": 7900
},
{
"epoch": 25.72,
"learning_rate": 2.8928472222222224e-05,
"loss": 0.9118,
"step": 8000
},
{
"epoch": 25.72,
"eval_cer": 0.03844869549731382,
"eval_loss": 0.12918178737163544,
"eval_runtime": 197.6149,
"eval_samples_per_second": 29.36,
"eval_steps_per_second": 0.46,
"eval_wer": 0.2040445514181887,
"step": 8000
},
{
"epoch": 26.05,
"learning_rate": 2.826875e-05,
"loss": 0.9057,
"step": 8100
},
{
"epoch": 26.37,
"learning_rate": 2.7609027777777785e-05,
"loss": 0.8956,
"step": 8200
},
{
"epoch": 26.69,
"learning_rate": 2.694930555555556e-05,
"loss": 0.9088,
"step": 8300
},
{
"epoch": 27.01,
"learning_rate": 2.6289583333333333e-05,
"loss": 0.8997,
"step": 8400
},
{
"epoch": 27.33,
"learning_rate": 2.5629861111111116e-05,
"loss": 0.8848,
"step": 8500
},
{
"epoch": 27.33,
"eval_cer": 0.03840788341099997,
"eval_loss": 0.1298777312040329,
"eval_runtime": 197.318,
"eval_samples_per_second": 29.404,
"eval_steps_per_second": 0.461,
"eval_wer": 0.20281749964604276,
"step": 8500
},
{
"epoch": 27.65,
"learning_rate": 2.4970138888888895e-05,
"loss": 0.8926,
"step": 8600
},
{
"epoch": 27.97,
"learning_rate": 2.431041666666667e-05,
"loss": 0.8802,
"step": 8700
},
{
"epoch": 28.3,
"learning_rate": 2.365069444444445e-05,
"loss": 0.8784,
"step": 8800
},
{
"epoch": 28.62,
"learning_rate": 2.2990972222222225e-05,
"loss": 0.8749,
"step": 8900
},
{
"epoch": 28.94,
"learning_rate": 2.2331250000000004e-05,
"loss": 0.8667,
"step": 9000
},
{
"epoch": 28.94,
"eval_cer": 0.03673829806179692,
"eval_loss": 0.12283530086278915,
"eval_runtime": 199.3855,
"eval_samples_per_second": 29.099,
"eval_steps_per_second": 0.456,
"eval_wer": 0.1945113030345934,
"step": 9000
},
{
"epoch": 29.26,
"learning_rate": 2.1671527777777783e-05,
"loss": 0.8628,
"step": 9100
},
{
"epoch": 29.58,
"learning_rate": 2.101180555555556e-05,
"loss": 0.8775,
"step": 9200
},
{
"epoch": 29.9,
"learning_rate": 2.0352083333333338e-05,
"loss": 0.8661,
"step": 9300
},
{
"epoch": 30.23,
"learning_rate": 1.9692361111111114e-05,
"loss": 0.8624,
"step": 9400
},
{
"epoch": 30.55,
"learning_rate": 1.9032638888888893e-05,
"loss": 0.8641,
"step": 9500
},
{
"epoch": 30.55,
"eval_cer": 0.036352438336647766,
"eval_loss": 0.12234856933355331,
"eval_runtime": 202.2537,
"eval_samples_per_second": 28.687,
"eval_steps_per_second": 0.45,
"eval_wer": 0.19385058284959178,
"step": 9500
},
{
"epoch": 30.87,
"learning_rate": 1.837291666666667e-05,
"loss": 0.8637,
"step": 9600
},
{
"epoch": 31.19,
"learning_rate": 1.7713194444444447e-05,
"loss": 0.8608,
"step": 9700
},
{
"epoch": 31.51,
"learning_rate": 1.7053472222222226e-05,
"loss": 0.8556,
"step": 9800
},
{
"epoch": 31.83,
"learning_rate": 1.6393750000000002e-05,
"loss": 0.854,
"step": 9900
},
{
"epoch": 32.15,
"learning_rate": 1.573402777777778e-05,
"loss": 0.8516,
"step": 10000
},
{
"epoch": 32.15,
"eval_cer": 0.03494627645365231,
"eval_loss": 0.11841931194067001,
"eval_runtime": 199.2371,
"eval_samples_per_second": 29.121,
"eval_steps_per_second": 0.457,
"eval_wer": 0.18762093539100477,
"step": 10000
},
{
"epoch": 32.48,
"learning_rate": 1.5074305555555557e-05,
"loss": 0.8433,
"step": 10100
},
{
"epoch": 32.8,
"learning_rate": 1.4414583333333338e-05,
"loss": 0.8507,
"step": 10200
},
{
"epoch": 33.12,
"learning_rate": 1.3754861111111117e-05,
"loss": 0.8419,
"step": 10300
},
{
"epoch": 33.44,
"learning_rate": 1.3095138888888892e-05,
"loss": 0.8344,
"step": 10400
},
{
"epoch": 33.76,
"learning_rate": 1.2435416666666671e-05,
"loss": 0.8379,
"step": 10500
},
{
"epoch": 33.76,
"eval_cer": 0.03375159538155591,
"eval_loss": 0.11372008919715881,
"eval_runtime": 199.4785,
"eval_samples_per_second": 29.086,
"eval_steps_per_second": 0.456,
"eval_wer": 0.18207560526688377,
"step": 10500
},
{
"epoch": 34.08,
"learning_rate": 1.1782291666666672e-05,
"loss": 0.8302,
"step": 10600
},
{
"epoch": 34.41,
"learning_rate": 1.1122569444444448e-05,
"loss": 0.8294,
"step": 10700
},
{
"epoch": 34.73,
"learning_rate": 1.0462847222222227e-05,
"loss": 0.8225,
"step": 10800
},
{
"epoch": 35.05,
"learning_rate": 9.803125000000001e-06,
"loss": 0.8237,
"step": 10900
},
{
"epoch": 35.37,
"learning_rate": 9.143402777777782e-06,
"loss": 0.8235,
"step": 11000
},
{
"epoch": 35.37,
"eval_cer": 0.03308005105220979,
"eval_loss": 0.11269930005073547,
"eval_runtime": 198.8276,
"eval_samples_per_second": 29.181,
"eval_steps_per_second": 0.458,
"eval_wer": 0.1778753126622304,
"step": 11000
},
{
"epoch": 35.69,
"learning_rate": 8.483680555555563e-06,
"loss": 0.8205,
"step": 11100
},
{
"epoch": 36.01,
"learning_rate": 7.823958333333337e-06,
"loss": 0.826,
"step": 11200
},
{
"epoch": 36.33,
"learning_rate": 7.1642361111111165e-06,
"loss": 0.8207,
"step": 11300
},
{
"epoch": 36.66,
"learning_rate": 6.504513888888891e-06,
"loss": 0.8129,
"step": 11400
},
{
"epoch": 36.98,
"learning_rate": 5.844791666666671e-06,
"loss": 0.8112,
"step": 11500
},
{
"epoch": 36.98,
"eval_cer": 0.03268677094773085,
"eval_loss": 0.11033473163843155,
"eval_runtime": 201.8103,
"eval_samples_per_second": 28.75,
"eval_steps_per_second": 0.451,
"eval_wer": 0.17662466374062014,
"step": 11500
},
{
"epoch": 37.3,
"learning_rate": 5.185069444444451e-06,
"loss": 0.805,
"step": 11600
},
{
"epoch": 37.62,
"learning_rate": 4.525347222222226e-06,
"loss": 0.8108,
"step": 11700
},
{
"epoch": 37.94,
"learning_rate": 3.865625000000006e-06,
"loss": 0.8025,
"step": 11800
},
{
"epoch": 38.26,
"learning_rate": 3.2059027777777807e-06,
"loss": 0.8018,
"step": 11900
},
{
"epoch": 38.59,
"learning_rate": 2.5461805555555606e-06,
"loss": 0.8069,
"step": 12000
},
{
"epoch": 38.59,
"eval_cer": 0.032260099136267845,
"eval_loss": 0.10924588888883591,
"eval_runtime": 199.7196,
"eval_samples_per_second": 29.051,
"eval_steps_per_second": 0.456,
"eval_wer": 0.17520883477275945,
"step": 12000
},
{
"epoch": 38.59,
"step": 12000,
"total_flos": 1.0363087195555613e+21,
"train_loss": 1.0786900800069172,
"train_runtime": 133237.4383,
"train_samples_per_second": 11.528,
"train_steps_per_second": 0.09
}
],
"max_steps": 12000,
"num_train_epochs": 39,
"total_flos": 1.0363087195555613e+21,
"trial_name": null,
"trial_params": null
}