wav2vec2-xls-r-1b-uk / trainer_state.json
arampacha's picture
iter 1
f3f904f
raw history blame
No virus
17.6 kB
{
"best_metric": 0.12852737307548523,
"best_model_checkpoint": "./checkpoint-10000",
"epoch": 64.51323175621492,
"global_step": 10000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.64,
"learning_rate": 8.4824e-06,
"loss": 6.9402,
"step": 100
},
{
"epoch": 1.29,
"learning_rate": 1.6402400000000004e-05,
"loss": 3.2608,
"step": 200
},
{
"epoch": 1.93,
"learning_rate": 2.4322400000000003e-05,
"loss": 2.4251,
"step": 300
},
{
"epoch": 2.58,
"learning_rate": 3.22424e-05,
"loss": 1.4757,
"step": 400
},
{
"epoch": 3.22,
"learning_rate": 4.01624e-05,
"loss": 1.2323,
"step": 500
},
{
"epoch": 3.22,
"eval_cer": 0.07965777210531003,
"eval_loss": 0.28158774971961975,
"eval_runtime": 197.0167,
"eval_samples_per_second": 29.449,
"eval_steps_per_second": 0.462,
"eval_wer": 0.41325687856906884,
"step": 500
},
{
"epoch": 3.87,
"learning_rate": 4.80824e-05,
"loss": 1.1608,
"step": 600
},
{
"epoch": 4.51,
"learning_rate": 5.600240000000001e-05,
"loss": 1.1297,
"step": 700
},
{
"epoch": 5.16,
"learning_rate": 6.39224e-05,
"loss": 1.0738,
"step": 800
},
{
"epoch": 5.8,
"learning_rate": 7.184240000000001e-05,
"loss": 1.0863,
"step": 900
},
{
"epoch": 6.45,
"learning_rate": 7.97624e-05,
"loss": 0.9826,
"step": 1000
},
{
"epoch": 6.45,
"eval_cer": 0.05135273515182096,
"eval_loss": 0.19702914357185364,
"eval_runtime": 195.781,
"eval_samples_per_second": 29.635,
"eval_steps_per_second": 0.465,
"eval_wer": 0.26877153239888624,
"step": 1000
},
{
"epoch": 7.1,
"learning_rate": 8e-05,
"loss": 0.9708,
"step": 1100
},
{
"epoch": 7.74,
"learning_rate": 8e-05,
"loss": 0.917,
"step": 1200
},
{
"epoch": 8.38,
"learning_rate": 8e-05,
"loss": 0.888,
"step": 1300
},
{
"epoch": 9.03,
"learning_rate": 8e-05,
"loss": 0.9048,
"step": 1400
},
{
"epoch": 9.67,
"learning_rate": 8e-05,
"loss": 0.8628,
"step": 1500
},
{
"epoch": 9.67,
"eval_cer": 0.04743848505535603,
"eval_loss": 0.16490551829338074,
"eval_runtime": 196.0899,
"eval_samples_per_second": 29.588,
"eval_steps_per_second": 0.464,
"eval_wer": 0.24850158100901412,
"step": 1500
},
{
"epoch": 10.32,
"learning_rate": 8e-05,
"loss": 0.8616,
"step": 1600
},
{
"epoch": 10.96,
"learning_rate": 8e-05,
"loss": 0.8517,
"step": 1700
},
{
"epoch": 11.61,
"learning_rate": 8e-05,
"loss": 0.8455,
"step": 1800
},
{
"epoch": 12.26,
"learning_rate": 8e-05,
"loss": 0.8436,
"step": 1900
},
{
"epoch": 12.9,
"learning_rate": 8e-05,
"loss": 0.8348,
"step": 2000
},
{
"epoch": 12.9,
"eval_cer": 0.046703867501706686,
"eval_loss": 0.16045768558979034,
"eval_runtime": 196.1747,
"eval_samples_per_second": 29.576,
"eval_steps_per_second": 0.464,
"eval_wer": 0.24604747746472228,
"step": 2000
},
{
"epoch": 13.55,
"learning_rate": 8e-05,
"loss": 0.844,
"step": 2100
},
{
"epoch": 14.19,
"learning_rate": 8e-05,
"loss": 0.8369,
"step": 2200
},
{
"epoch": 14.83,
"learning_rate": 8e-05,
"loss": 0.8241,
"step": 2300
},
{
"epoch": 15.48,
"learning_rate": 8e-05,
"loss": 0.8235,
"step": 2400
},
{
"epoch": 16.13,
"learning_rate": 8e-05,
"loss": 0.8186,
"step": 2500
},
{
"epoch": 16.13,
"eval_cer": 0.04690421774361105,
"eval_loss": 0.1608021855354309,
"eval_runtime": 196.173,
"eval_samples_per_second": 29.576,
"eval_steps_per_second": 0.464,
"eval_wer": 0.24692057199490303,
"step": 2500
},
{
"epoch": 16.77,
"learning_rate": 8e-05,
"loss": 0.8355,
"step": 2600
},
{
"epoch": 17.42,
"learning_rate": 8e-05,
"loss": 0.8157,
"step": 2700
},
{
"epoch": 18.06,
"learning_rate": 8e-05,
"loss": 0.8175,
"step": 2800
},
{
"epoch": 18.71,
"learning_rate": 8e-05,
"loss": 0.801,
"step": 2900
},
{
"epoch": 19.35,
"learning_rate": 8e-05,
"loss": 0.8011,
"step": 3000
},
{
"epoch": 19.35,
"eval_cer": 0.046789201863999284,
"eval_loss": 0.1620311141014099,
"eval_runtime": 197.3892,
"eval_samples_per_second": 29.394,
"eval_steps_per_second": 0.461,
"eval_wer": 0.24118646467506724,
"step": 3000
},
{
"epoch": 19.99,
"learning_rate": 8e-05,
"loss": 0.7888,
"step": 3100
},
{
"epoch": 20.64,
"learning_rate": 8e-05,
"loss": 0.8008,
"step": 3200
},
{
"epoch": 21.29,
"learning_rate": 8e-05,
"loss": 0.8197,
"step": 3300
},
{
"epoch": 21.93,
"learning_rate": 8e-05,
"loss": 0.8065,
"step": 3400
},
{
"epoch": 22.58,
"learning_rate": 8e-05,
"loss": 0.807,
"step": 3500
},
{
"epoch": 22.58,
"eval_cer": 0.049805586061559465,
"eval_loss": 0.17369326949119568,
"eval_runtime": 196.0869,
"eval_samples_per_second": 29.589,
"eval_steps_per_second": 0.464,
"eval_wer": 0.252395110670631,
"step": 3500
},
{
"epoch": 23.22,
"learning_rate": 8e-05,
"loss": 0.8045,
"step": 3600
},
{
"epoch": 23.87,
"learning_rate": 8e-05,
"loss": 0.7925,
"step": 3700
},
{
"epoch": 24.51,
"learning_rate": 8e-05,
"loss": 0.8046,
"step": 3800
},
{
"epoch": 25.16,
"learning_rate": 8e-05,
"loss": 0.8102,
"step": 3900
},
{
"epoch": 25.8,
"learning_rate": 8e-05,
"loss": 0.7758,
"step": 4000
},
{
"epoch": 25.8,
"eval_cer": 0.04979074530289988,
"eval_loss": 0.1708839237689972,
"eval_runtime": 196.4196,
"eval_samples_per_second": 29.539,
"eval_steps_per_second": 0.463,
"eval_wer": 0.2535985652933126,
"step": 4000
},
{
"epoch": 26.45,
"learning_rate": 8e-05,
"loss": 0.7968,
"step": 4100
},
{
"epoch": 27.1,
"learning_rate": 8e-05,
"loss": 0.7904,
"step": 4200
},
{
"epoch": 27.74,
"learning_rate": 8e-05,
"loss": 0.8001,
"step": 4300
},
{
"epoch": 28.38,
"learning_rate": 8e-05,
"loss": 0.7869,
"step": 4400
},
{
"epoch": 29.03,
"learning_rate": 8e-05,
"loss": 0.7923,
"step": 4500
},
{
"epoch": 29.03,
"eval_cer": 0.04736799145172301,
"eval_loss": 0.16446976363658905,
"eval_runtime": 196.4759,
"eval_samples_per_second": 29.53,
"eval_steps_per_second": 0.463,
"eval_wer": 0.24356977677096606,
"step": 4500
},
{
"epoch": 29.67,
"learning_rate": 8e-05,
"loss": 0.772,
"step": 4600
},
{
"epoch": 30.32,
"learning_rate": 8e-05,
"loss": 0.7702,
"step": 4700
},
{
"epoch": 30.96,
"learning_rate": 8e-05,
"loss": 0.7797,
"step": 4800
},
{
"epoch": 31.61,
"learning_rate": 8e-05,
"loss": 0.7759,
"step": 4900
},
{
"epoch": 32.26,
"learning_rate": 8e-05,
"loss": 0.7717,
"step": 5000
},
{
"epoch": 32.26,
"eval_cer": 0.052350776171677896,
"eval_loss": 0.1811000257730484,
"eval_runtime": 196.7068,
"eval_samples_per_second": 29.496,
"eval_steps_per_second": 0.463,
"eval_wer": 0.26355656236726605,
"step": 5000
},
{
"epoch": 32.9,
"learning_rate": 7.852560000000001e-05,
"loss": 0.7608,
"step": 5100
},
{
"epoch": 33.55,
"learning_rate": 7.700560000000001e-05,
"loss": 0.763,
"step": 5200
},
{
"epoch": 34.19,
"learning_rate": 7.54856e-05,
"loss": 0.7712,
"step": 5300
},
{
"epoch": 34.83,
"learning_rate": 7.39656e-05,
"loss": 0.7478,
"step": 5400
},
{
"epoch": 35.48,
"learning_rate": 7.24456e-05,
"loss": 0.7447,
"step": 5500
},
{
"epoch": 35.48,
"eval_cer": 0.04679662224332908,
"eval_loss": 0.16353937983512878,
"eval_runtime": 196.1767,
"eval_samples_per_second": 29.575,
"eval_steps_per_second": 0.464,
"eval_wer": 0.2404785501911369,
"step": 5500
},
{
"epoch": 36.13,
"learning_rate": 7.09256e-05,
"loss": 0.7544,
"step": 5600
},
{
"epoch": 36.77,
"learning_rate": 6.94056e-05,
"loss": 0.7438,
"step": 5700
},
{
"epoch": 37.42,
"learning_rate": 6.79008e-05,
"loss": 0.742,
"step": 5800
},
{
"epoch": 38.06,
"learning_rate": 6.638080000000001e-05,
"loss": 0.7441,
"step": 5900
},
{
"epoch": 38.71,
"learning_rate": 6.486080000000001e-05,
"loss": 0.7267,
"step": 6000
},
{
"epoch": 38.71,
"eval_cer": 0.046236383603929836,
"eval_loss": 0.15783575177192688,
"eval_runtime": 197.1092,
"eval_samples_per_second": 29.435,
"eval_steps_per_second": 0.462,
"eval_wer": 0.23542876020576714,
"step": 6000
},
{
"epoch": 39.35,
"learning_rate": 6.33408e-05,
"loss": 0.7112,
"step": 6100
},
{
"epoch": 39.99,
"learning_rate": 6.18208e-05,
"loss": 0.7052,
"step": 6200
},
{
"epoch": 40.64,
"learning_rate": 6.0300800000000004e-05,
"loss": 0.7105,
"step": 6300
},
{
"epoch": 41.29,
"learning_rate": 5.878080000000001e-05,
"loss": 0.7107,
"step": 6400
},
{
"epoch": 41.93,
"learning_rate": 5.72608e-05,
"loss": 0.7046,
"step": 6500
},
{
"epoch": 41.93,
"eval_cer": 0.044429521237125645,
"eval_loss": 0.15552951395511627,
"eval_runtime": 196.7222,
"eval_samples_per_second": 29.493,
"eval_steps_per_second": 0.463,
"eval_wer": 0.22957666713860966,
"step": 6500
},
{
"epoch": 42.58,
"learning_rate": 5.574080000000001e-05,
"loss": 0.7035,
"step": 6600
},
{
"epoch": 43.22,
"learning_rate": 5.422080000000001e-05,
"loss": 0.6967,
"step": 6700
},
{
"epoch": 43.87,
"learning_rate": 5.271600000000001e-05,
"loss": 0.687,
"step": 6800
},
{
"epoch": 44.51,
"learning_rate": 5.1196e-05,
"loss": 0.6875,
"step": 6900
},
{
"epoch": 45.16,
"learning_rate": 4.967600000000001e-05,
"loss": 0.6896,
"step": 7000
},
{
"epoch": 45.16,
"eval_cer": 0.043932355822029624,
"eval_loss": 0.15479956567287445,
"eval_runtime": 196.5953,
"eval_samples_per_second": 29.512,
"eval_steps_per_second": 0.463,
"eval_wer": 0.2271697578932465,
"step": 7000
},
{
"epoch": 45.8,
"learning_rate": 4.8156000000000004e-05,
"loss": 0.6722,
"step": 7100
},
{
"epoch": 46.45,
"learning_rate": 4.663600000000001e-05,
"loss": 0.6816,
"step": 7200
},
{
"epoch": 47.1,
"learning_rate": 4.5116000000000006e-05,
"loss": 0.6658,
"step": 7300
},
{
"epoch": 47.74,
"learning_rate": 4.359600000000001e-05,
"loss": 0.6507,
"step": 7400
},
{
"epoch": 48.38,
"learning_rate": 4.207600000000001e-05,
"loss": 0.6575,
"step": 7500
},
{
"epoch": 48.38,
"eval_cer": 0.03991422041494761,
"eval_loss": 0.14319901168346405,
"eval_runtime": 196.3465,
"eval_samples_per_second": 29.55,
"eval_steps_per_second": 0.463,
"eval_wer": 0.2096370758412384,
"step": 7500
},
{
"epoch": 49.03,
"learning_rate": 4.0556e-05,
"loss": 0.6524,
"step": 7600
},
{
"epoch": 49.67,
"learning_rate": 3.9036000000000004e-05,
"loss": 0.6336,
"step": 7700
},
{
"epoch": 50.32,
"learning_rate": 3.751600000000001e-05,
"loss": 0.6335,
"step": 7800
},
{
"epoch": 50.96,
"learning_rate": 3.5996000000000006e-05,
"loss": 0.6356,
"step": 7900
},
{
"epoch": 51.61,
"learning_rate": 3.447600000000001e-05,
"loss": 0.6264,
"step": 8000
},
{
"epoch": 51.61,
"eval_cer": 0.039750972069692206,
"eval_loss": 0.14660798013210297,
"eval_runtime": 197.4423,
"eval_samples_per_second": 29.386,
"eval_steps_per_second": 0.461,
"eval_wer": 0.20560196328283542,
"step": 8000
},
{
"epoch": 52.26,
"learning_rate": 3.295600000000001e-05,
"loss": 0.6151,
"step": 8100
},
{
"epoch": 52.9,
"learning_rate": 3.1436e-05,
"loss": 0.6138,
"step": 8200
},
{
"epoch": 53.55,
"learning_rate": 2.9916000000000003e-05,
"loss": 0.6066,
"step": 8300
},
{
"epoch": 54.19,
"learning_rate": 2.839600000000001e-05,
"loss": 0.6091,
"step": 8400
},
{
"epoch": 54.83,
"learning_rate": 2.687600000000001e-05,
"loss": 0.589,
"step": 8500
},
{
"epoch": 54.83,
"eval_cer": 0.03713528835594076,
"eval_loss": 0.1351083666086197,
"eval_runtime": 196.1992,
"eval_samples_per_second": 29.572,
"eval_steps_per_second": 0.464,
"eval_wer": 0.19427533153994997,
"step": 8500
},
{
"epoch": 55.48,
"learning_rate": 2.5356000000000006e-05,
"loss": 0.59,
"step": 8600
},
{
"epoch": 56.13,
"learning_rate": 2.3836000000000007e-05,
"loss": 0.5954,
"step": 8700
},
{
"epoch": 56.77,
"learning_rate": 2.2316000000000005e-05,
"loss": 0.5886,
"step": 8800
},
{
"epoch": 57.42,
"learning_rate": 2.0796000000000002e-05,
"loss": 0.5923,
"step": 8900
},
{
"epoch": 58.06,
"learning_rate": 1.927600000000001e-05,
"loss": 0.573,
"step": 9000
},
{
"epoch": 58.06,
"eval_cer": 0.03653794781989255,
"eval_loss": 0.13869842886924744,
"eval_runtime": 197.6459,
"eval_samples_per_second": 29.356,
"eval_steps_per_second": 0.46,
"eval_wer": 0.19342583415923356,
"step": 9000
},
{
"epoch": 58.71,
"learning_rate": 1.7756000000000008e-05,
"loss": 0.5681,
"step": 9100
},
{
"epoch": 59.35,
"learning_rate": 1.623600000000001e-05,
"loss": 0.5749,
"step": 9200
},
{
"epoch": 59.99,
"learning_rate": 1.4716000000000006e-05,
"loss": 0.5649,
"step": 9300
},
{
"epoch": 60.64,
"learning_rate": 1.3196000000000004e-05,
"loss": 0.5649,
"step": 9400
},
{
"epoch": 61.29,
"learning_rate": 1.1676000000000003e-05,
"loss": 0.5537,
"step": 9500
},
{
"epoch": 61.29,
"eval_cer": 0.035276483333828025,
"eval_loss": 0.132797509431839,
"eval_runtime": 197.1347,
"eval_samples_per_second": 29.432,
"eval_steps_per_second": 0.462,
"eval_wer": 0.18825805842654209,
"step": 9500
},
{
"epoch": 61.93,
"learning_rate": 1.015600000000001e-05,
"loss": 0.5551,
"step": 9600
},
{
"epoch": 62.58,
"learning_rate": 8.636000000000008e-06,
"loss": 0.5563,
"step": 9700
},
{
"epoch": 63.22,
"learning_rate": 7.116000000000008e-06,
"loss": 0.5469,
"step": 9800
},
{
"epoch": 63.87,
"learning_rate": 5.596000000000006e-06,
"loss": 0.5431,
"step": 9900
},
{
"epoch": 64.51,
"learning_rate": 4.076000000000005e-06,
"loss": 0.544,
"step": 10000
},
{
"epoch": 64.51,
"eval_cer": 0.034204238520673176,
"eval_loss": 0.12852737307548523,
"eval_runtime": 197.3431,
"eval_samples_per_second": 29.401,
"eval_steps_per_second": 0.461,
"eval_wer": 0.18207560526688377,
"step": 10000
},
{
"epoch": 64.51,
"step": 10000,
"total_flos": 6.715892353150186e+20,
"train_loss": 0.5247637950897217,
"train_runtime": 62085.4852,
"train_samples_per_second": 20.617,
"train_steps_per_second": 0.161
}
],
"max_steps": 10000,
"num_train_epochs": 65,
"total_flos": 6.715892353150186e+20,
"trial_name": null,
"trial_params": null
}