wav2vec2-xlsr-1B-NPSC-NN / trainer_state.json
pere's picture
End of training
9b00ad4
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 50.0,
"global_step": 23250,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.22,
"learning_rate": 2.8500000000000002e-06,
"loss": 7.8132,
"step": 100
},
{
"epoch": 0.43,
"learning_rate": 5.82e-06,
"loss": 3.053,
"step": 200
},
{
"epoch": 0.65,
"learning_rate": 8.82e-06,
"loss": 2.9223,
"step": 300
},
{
"epoch": 0.86,
"learning_rate": 1.182e-05,
"loss": 2.6894,
"step": 400
},
{
"epoch": 1.08,
"learning_rate": 1.482e-05,
"loss": 1.6894,
"step": 500
},
{
"epoch": 1.08,
"eval_loss": 1.2422521114349365,
"eval_runtime": 81.2578,
"eval_samples_per_second": 10.177,
"eval_steps_per_second": 1.28,
"eval_wer": 0.8619092627599244,
"step": 500
},
{
"epoch": 1.29,
"learning_rate": 1.782e-05,
"loss": 1.3281,
"step": 600
},
{
"epoch": 1.51,
"learning_rate": 2.082e-05,
"loss": 1.1146,
"step": 700
},
{
"epoch": 1.72,
"learning_rate": 2.3820000000000002e-05,
"loss": 0.9295,
"step": 800
},
{
"epoch": 1.94,
"learning_rate": 2.682e-05,
"loss": 0.8392,
"step": 900
},
{
"epoch": 2.15,
"learning_rate": 2.982e-05,
"loss": 0.7543,
"step": 1000
},
{
"epoch": 2.15,
"eval_loss": 0.5956054329872131,
"eval_runtime": 81.5082,
"eval_samples_per_second": 10.146,
"eval_steps_per_second": 1.276,
"eval_wer": 0.38166351606805293,
"step": 1000
},
{
"epoch": 2.37,
"learning_rate": 3.282e-05,
"loss": 0.7128,
"step": 1100
},
{
"epoch": 2.58,
"learning_rate": 3.582e-05,
"loss": 0.6859,
"step": 1200
},
{
"epoch": 2.8,
"learning_rate": 3.8820000000000004e-05,
"loss": 0.6024,
"step": 1300
},
{
"epoch": 3.01,
"learning_rate": 4.1819999999999996e-05,
"loss": 0.6365,
"step": 1400
},
{
"epoch": 3.23,
"learning_rate": 4.482e-05,
"loss": 0.5481,
"step": 1500
},
{
"epoch": 3.23,
"eval_loss": 0.5042839646339417,
"eval_runtime": 83.6557,
"eval_samples_per_second": 9.886,
"eval_steps_per_second": 1.243,
"eval_wer": 0.32457466918714556,
"step": 1500
},
{
"epoch": 3.44,
"learning_rate": 4.7820000000000006e-05,
"loss": 0.5648,
"step": 1600
},
{
"epoch": 3.66,
"learning_rate": 5.082e-05,
"loss": 0.555,
"step": 1700
},
{
"epoch": 3.87,
"learning_rate": 5.382e-05,
"loss": 0.5723,
"step": 1800
},
{
"epoch": 4.09,
"learning_rate": 5.682e-05,
"loss": 0.4716,
"step": 1900
},
{
"epoch": 4.3,
"learning_rate": 5.982e-05,
"loss": 0.4661,
"step": 2000
},
{
"epoch": 4.3,
"eval_loss": 0.48127105832099915,
"eval_runtime": 81.5918,
"eval_samples_per_second": 10.136,
"eval_steps_per_second": 1.275,
"eval_wer": 0.2793005671077505,
"step": 2000
},
{
"epoch": 4.52,
"learning_rate": 5.973458823529412e-05,
"loss": 0.4756,
"step": 2100
},
{
"epoch": 4.73,
"learning_rate": 5.945223529411765e-05,
"loss": 0.4022,
"step": 2200
},
{
"epoch": 4.95,
"learning_rate": 5.9169882352941175e-05,
"loss": 0.4314,
"step": 2300
},
{
"epoch": 5.16,
"learning_rate": 5.8887529411764706e-05,
"loss": 0.4154,
"step": 2400
},
{
"epoch": 5.38,
"learning_rate": 5.8605176470588236e-05,
"loss": 0.3901,
"step": 2500
},
{
"epoch": 5.38,
"eval_loss": 0.4370500147342682,
"eval_runtime": 81.2387,
"eval_samples_per_second": 10.18,
"eval_steps_per_second": 1.28,
"eval_wer": 0.25916824196597354,
"step": 2500
},
{
"epoch": 5.59,
"learning_rate": 5.8322823529411767e-05,
"loss": 0.4014,
"step": 2600
},
{
"epoch": 5.81,
"learning_rate": 5.80404705882353e-05,
"loss": 0.3762,
"step": 2700
},
{
"epoch": 6.02,
"learning_rate": 5.775811764705882e-05,
"loss": 0.3777,
"step": 2800
},
{
"epoch": 6.24,
"learning_rate": 5.747576470588236e-05,
"loss": 0.3342,
"step": 2900
},
{
"epoch": 6.45,
"learning_rate": 5.719341176470588e-05,
"loss": 0.3512,
"step": 3000
},
{
"epoch": 6.45,
"eval_loss": 0.4216073155403137,
"eval_runtime": 81.8015,
"eval_samples_per_second": 10.11,
"eval_steps_per_second": 1.271,
"eval_wer": 0.24584120982986768,
"step": 3000
},
{
"epoch": 6.67,
"learning_rate": 5.691105882352942e-05,
"loss": 0.3204,
"step": 3100
},
{
"epoch": 6.88,
"learning_rate": 5.662870588235294e-05,
"loss": 0.3232,
"step": 3200
},
{
"epoch": 7.1,
"learning_rate": 5.634635294117647e-05,
"loss": 0.2976,
"step": 3300
},
{
"epoch": 7.31,
"learning_rate": 5.6064000000000004e-05,
"loss": 0.296,
"step": 3400
},
{
"epoch": 7.53,
"learning_rate": 5.578164705882353e-05,
"loss": 0.3016,
"step": 3500
},
{
"epoch": 7.53,
"eval_loss": 0.3813554048538208,
"eval_runtime": 83.1204,
"eval_samples_per_second": 9.949,
"eval_steps_per_second": 1.251,
"eval_wer": 0.22570888468809075,
"step": 3500
},
{
"epoch": 7.74,
"learning_rate": 5.549929411764706e-05,
"loss": 0.2726,
"step": 3600
},
{
"epoch": 7.96,
"learning_rate": 5.521694117647059e-05,
"loss": 0.2953,
"step": 3700
},
{
"epoch": 8.17,
"learning_rate": 5.493458823529412e-05,
"loss": 0.2647,
"step": 3800
},
{
"epoch": 8.39,
"learning_rate": 5.465223529411765e-05,
"loss": 0.2524,
"step": 3900
},
{
"epoch": 8.6,
"learning_rate": 5.436988235294118e-05,
"loss": 0.278,
"step": 4000
},
{
"epoch": 8.6,
"eval_loss": 0.41506877541542053,
"eval_runtime": 81.9008,
"eval_samples_per_second": 10.098,
"eval_steps_per_second": 1.27,
"eval_wer": 0.21446124763705104,
"step": 4000
},
{
"epoch": 8.82,
"learning_rate": 5.4087529411764704e-05,
"loss": 0.2559,
"step": 4100
},
{
"epoch": 9.03,
"learning_rate": 5.380517647058824e-05,
"loss": 0.2669,
"step": 4200
},
{
"epoch": 9.25,
"learning_rate": 5.3522823529411765e-05,
"loss": 0.2189,
"step": 4300
},
{
"epoch": 9.46,
"learning_rate": 5.3240470588235296e-05,
"loss": 0.2321,
"step": 4400
},
{
"epoch": 9.68,
"learning_rate": 5.2958117647058826e-05,
"loss": 0.2435,
"step": 4500
},
{
"epoch": 9.68,
"eval_loss": 0.48163077235221863,
"eval_runtime": 80.9004,
"eval_samples_per_second": 10.222,
"eval_steps_per_second": 1.286,
"eval_wer": 0.21304347826086956,
"step": 4500
},
{
"epoch": 9.89,
"learning_rate": 5.267576470588235e-05,
"loss": 0.2348,
"step": 4600
},
{
"epoch": 10.11,
"learning_rate": 5.239341176470589e-05,
"loss": 0.2266,
"step": 4700
},
{
"epoch": 10.32,
"learning_rate": 5.211105882352941e-05,
"loss": 0.2345,
"step": 4800
},
{
"epoch": 10.54,
"learning_rate": 5.182870588235294e-05,
"loss": 0.2266,
"step": 4900
},
{
"epoch": 10.75,
"learning_rate": 5.154635294117647e-05,
"loss": 0.2122,
"step": 5000
},
{
"epoch": 10.75,
"eval_loss": 0.4489321708679199,
"eval_runtime": 81.0474,
"eval_samples_per_second": 10.204,
"eval_steps_per_second": 1.283,
"eval_wer": 0.21370510396975426,
"step": 5000
},
{
"epoch": 10.97,
"learning_rate": 5.1264e-05,
"loss": 0.2038,
"step": 5100
},
{
"epoch": 11.18,
"learning_rate": 5.098164705882353e-05,
"loss": 0.1916,
"step": 5200
},
{
"epoch": 11.4,
"learning_rate": 5.069929411764706e-05,
"loss": 0.1896,
"step": 5300
},
{
"epoch": 11.61,
"learning_rate": 5.041694117647059e-05,
"loss": 0.207,
"step": 5400
},
{
"epoch": 11.83,
"learning_rate": 5.013458823529412e-05,
"loss": 0.1949,
"step": 5500
},
{
"epoch": 11.83,
"eval_loss": 0.3977554738521576,
"eval_runtime": 80.708,
"eval_samples_per_second": 10.247,
"eval_steps_per_second": 1.289,
"eval_wer": 0.20633270321361058,
"step": 5500
},
{
"epoch": 12.04,
"learning_rate": 4.985223529411765e-05,
"loss": 0.2177,
"step": 5600
},
{
"epoch": 12.26,
"learning_rate": 4.956988235294118e-05,
"loss": 0.182,
"step": 5700
},
{
"epoch": 12.47,
"learning_rate": 4.928752941176471e-05,
"loss": 0.1809,
"step": 5800
},
{
"epoch": 12.69,
"learning_rate": 4.900517647058823e-05,
"loss": 0.1876,
"step": 5900
},
{
"epoch": 12.9,
"learning_rate": 4.872282352941177e-05,
"loss": 0.1929,
"step": 6000
},
{
"epoch": 12.9,
"eval_loss": 0.38229823112487793,
"eval_runtime": 82.0352,
"eval_samples_per_second": 10.081,
"eval_steps_per_second": 1.268,
"eval_wer": 0.20264650283553876,
"step": 6000
},
{
"epoch": 13.12,
"learning_rate": 4.8440470588235294e-05,
"loss": 0.1952,
"step": 6100
},
{
"epoch": 13.33,
"learning_rate": 4.8158117647058825e-05,
"loss": 0.1739,
"step": 6200
},
{
"epoch": 13.55,
"learning_rate": 4.7875764705882355e-05,
"loss": 0.1748,
"step": 6300
},
{
"epoch": 13.76,
"learning_rate": 4.759341176470588e-05,
"loss": 0.1625,
"step": 6400
},
{
"epoch": 13.98,
"learning_rate": 4.7311058823529416e-05,
"loss": 0.1757,
"step": 6500
},
{
"epoch": 13.98,
"eval_loss": 0.3409084379673004,
"eval_runtime": 80.5311,
"eval_samples_per_second": 10.269,
"eval_steps_per_second": 1.291,
"eval_wer": 0.19650283553875236,
"step": 6500
},
{
"epoch": 14.19,
"learning_rate": 4.702870588235294e-05,
"loss": 0.1422,
"step": 6600
},
{
"epoch": 14.41,
"learning_rate": 4.674635294117647e-05,
"loss": 0.1571,
"step": 6700
},
{
"epoch": 14.62,
"learning_rate": 4.6464e-05,
"loss": 0.1565,
"step": 6800
},
{
"epoch": 14.84,
"learning_rate": 4.618164705882353e-05,
"loss": 0.1717,
"step": 6900
},
{
"epoch": 15.05,
"learning_rate": 4.589929411764706e-05,
"loss": 0.1771,
"step": 7000
},
{
"epoch": 15.05,
"eval_loss": 0.3844490051269531,
"eval_runtime": 81.332,
"eval_samples_per_second": 10.168,
"eval_steps_per_second": 1.279,
"eval_wer": 0.19357277882797733,
"step": 7000
},
{
"epoch": 15.27,
"learning_rate": 4.561694117647059e-05,
"loss": 0.1408,
"step": 7100
},
{
"epoch": 15.48,
"learning_rate": 4.5334588235294116e-05,
"loss": 0.1512,
"step": 7200
},
{
"epoch": 15.7,
"learning_rate": 4.505223529411765e-05,
"loss": 0.1505,
"step": 7300
},
{
"epoch": 15.91,
"learning_rate": 4.476988235294118e-05,
"loss": 0.1436,
"step": 7400
},
{
"epoch": 16.13,
"learning_rate": 4.448752941176471e-05,
"loss": 0.1452,
"step": 7500
},
{
"epoch": 16.13,
"eval_loss": 0.37490135431289673,
"eval_runtime": 81.4843,
"eval_samples_per_second": 10.149,
"eval_steps_per_second": 1.276,
"eval_wer": 0.1899810964083176,
"step": 7500
},
{
"epoch": 16.34,
"learning_rate": 4.420517647058824e-05,
"loss": 0.1462,
"step": 7600
},
{
"epoch": 16.56,
"learning_rate": 4.392282352941176e-05,
"loss": 0.1425,
"step": 7700
},
{
"epoch": 16.77,
"learning_rate": 4.36404705882353e-05,
"loss": 0.144,
"step": 7800
},
{
"epoch": 16.99,
"learning_rate": 4.335811764705882e-05,
"loss": 0.1622,
"step": 7900
},
{
"epoch": 17.2,
"learning_rate": 4.3075764705882354e-05,
"loss": 0.1341,
"step": 8000
},
{
"epoch": 17.2,
"eval_loss": 0.44071856141090393,
"eval_runtime": 82.1207,
"eval_samples_per_second": 10.071,
"eval_steps_per_second": 1.266,
"eval_wer": 0.20264650283553876,
"step": 8000
},
{
"epoch": 17.42,
"learning_rate": 4.2793411764705884e-05,
"loss": 0.1151,
"step": 8100
},
{
"epoch": 17.63,
"learning_rate": 4.251105882352941e-05,
"loss": 0.1477,
"step": 8200
},
{
"epoch": 17.85,
"learning_rate": 4.2228705882352945e-05,
"loss": 0.1377,
"step": 8300
},
{
"epoch": 18.06,
"learning_rate": 4.194635294117647e-05,
"loss": 0.129,
"step": 8400
},
{
"epoch": 18.28,
"learning_rate": 4.1664e-05,
"loss": 0.13,
"step": 8500
},
{
"epoch": 18.28,
"eval_loss": 0.42531654238700867,
"eval_runtime": 82.3744,
"eval_samples_per_second": 10.04,
"eval_steps_per_second": 1.263,
"eval_wer": 0.1882797731568998,
"step": 8500
},
{
"epoch": 18.49,
"learning_rate": 4.138164705882353e-05,
"loss": 0.1353,
"step": 8600
},
{
"epoch": 18.71,
"learning_rate": 4.109929411764706e-05,
"loss": 0.1216,
"step": 8700
},
{
"epoch": 18.92,
"learning_rate": 4.081694117647059e-05,
"loss": 0.1301,
"step": 8800
},
{
"epoch": 19.14,
"learning_rate": 4.053458823529412e-05,
"loss": 0.1195,
"step": 8900
},
{
"epoch": 19.35,
"learning_rate": 4.0252235294117645e-05,
"loss": 0.1183,
"step": 9000
},
{
"epoch": 19.35,
"eval_loss": 0.43106988072395325,
"eval_runtime": 80.8241,
"eval_samples_per_second": 10.232,
"eval_steps_per_second": 1.287,
"eval_wer": 0.18799621928166352,
"step": 9000
},
{
"epoch": 19.57,
"learning_rate": 3.996988235294118e-05,
"loss": 0.1147,
"step": 9100
},
{
"epoch": 19.78,
"learning_rate": 3.9687529411764706e-05,
"loss": 0.1178,
"step": 9200
},
{
"epoch": 20.0,
"learning_rate": 3.940517647058823e-05,
"loss": 0.1177,
"step": 9300
},
{
"epoch": 20.22,
"learning_rate": 3.912282352941177e-05,
"loss": 0.1142,
"step": 9400
},
{
"epoch": 20.43,
"learning_rate": 3.884047058823529e-05,
"loss": 0.118,
"step": 9500
},
{
"epoch": 20.43,
"eval_loss": 0.4430650472640991,
"eval_runtime": 81.2185,
"eval_samples_per_second": 10.182,
"eval_steps_per_second": 1.28,
"eval_wer": 0.18818525519848772,
"step": 9500
},
{
"epoch": 20.65,
"learning_rate": 3.855811764705883e-05,
"loss": 0.1125,
"step": 9600
},
{
"epoch": 20.86,
"learning_rate": 3.827576470588235e-05,
"loss": 0.1231,
"step": 9700
},
{
"epoch": 21.08,
"learning_rate": 3.799341176470588e-05,
"loss": 0.1133,
"step": 9800
},
{
"epoch": 21.29,
"learning_rate": 3.771105882352941e-05,
"loss": 0.1152,
"step": 9900
},
{
"epoch": 21.51,
"learning_rate": 3.7428705882352944e-05,
"loss": 0.1123,
"step": 10000
},
{
"epoch": 21.51,
"eval_loss": 0.4753414988517761,
"eval_runtime": 81.7131,
"eval_samples_per_second": 10.121,
"eval_steps_per_second": 1.273,
"eval_wer": 0.18204158790170133,
"step": 10000
},
{
"epoch": 21.72,
"learning_rate": 3.7146352941176474e-05,
"loss": 0.1186,
"step": 10100
},
{
"epoch": 21.94,
"learning_rate": 3.6864e-05,
"loss": 0.1113,
"step": 10200
},
{
"epoch": 22.15,
"learning_rate": 3.658164705882353e-05,
"loss": 0.1243,
"step": 10300
},
{
"epoch": 22.37,
"learning_rate": 3.629929411764706e-05,
"loss": 0.1056,
"step": 10400
},
{
"epoch": 22.58,
"learning_rate": 3.601694117647059e-05,
"loss": 0.1037,
"step": 10500
},
{
"epoch": 22.58,
"eval_loss": 0.40868785977363586,
"eval_runtime": 80.8904,
"eval_samples_per_second": 10.224,
"eval_steps_per_second": 1.286,
"eval_wer": 0.1833648393194707,
"step": 10500
},
{
"epoch": 22.8,
"learning_rate": 3.573458823529411e-05,
"loss": 0.1055,
"step": 10600
},
{
"epoch": 23.01,
"learning_rate": 3.545223529411765e-05,
"loss": 0.0977,
"step": 10700
},
{
"epoch": 23.23,
"learning_rate": 3.5169882352941174e-05,
"loss": 0.1076,
"step": 10800
},
{
"epoch": 23.44,
"learning_rate": 3.488752941176471e-05,
"loss": 0.1028,
"step": 10900
},
{
"epoch": 23.66,
"learning_rate": 3.4605176470588235e-05,
"loss": 0.1066,
"step": 11000
},
{
"epoch": 23.66,
"eval_loss": 0.415127694606781,
"eval_runtime": 81.0404,
"eval_samples_per_second": 10.205,
"eval_steps_per_second": 1.283,
"eval_wer": 0.18449905482041587,
"step": 11000
},
{
"epoch": 23.87,
"learning_rate": 3.4322823529411766e-05,
"loss": 0.1091,
"step": 11100
},
{
"epoch": 24.09,
"learning_rate": 3.4040470588235296e-05,
"loss": 0.089,
"step": 11200
},
{
"epoch": 24.3,
"learning_rate": 3.375811764705882e-05,
"loss": 0.1014,
"step": 11300
},
{
"epoch": 24.52,
"learning_rate": 3.347576470588236e-05,
"loss": 0.0864,
"step": 11400
},
{
"epoch": 24.73,
"learning_rate": 3.319341176470588e-05,
"loss": 0.0977,
"step": 11500
},
{
"epoch": 24.73,
"eval_loss": 0.43674391508102417,
"eval_runtime": 81.0756,
"eval_samples_per_second": 10.2,
"eval_steps_per_second": 1.283,
"eval_wer": 0.1782608695652174,
"step": 11500
},
{
"epoch": 24.95,
"learning_rate": 3.291105882352941e-05,
"loss": 0.0989,
"step": 11600
},
{
"epoch": 25.16,
"learning_rate": 3.262870588235294e-05,
"loss": 0.0977,
"step": 11700
},
{
"epoch": 25.38,
"learning_rate": 3.234635294117647e-05,
"loss": 0.0869,
"step": 11800
},
{
"epoch": 25.59,
"learning_rate": 3.2064e-05,
"loss": 0.0851,
"step": 11900
},
{
"epoch": 25.81,
"learning_rate": 3.1781647058823534e-05,
"loss": 0.0968,
"step": 12000
},
{
"epoch": 25.81,
"eval_loss": 0.4236660897731781,
"eval_runtime": 80.9504,
"eval_samples_per_second": 10.216,
"eval_steps_per_second": 1.285,
"eval_wer": 0.17561436672967864,
"step": 12000
},
{
"epoch": 26.02,
"learning_rate": 3.149929411764706e-05,
"loss": 0.0866,
"step": 12100
},
{
"epoch": 26.24,
"learning_rate": 3.121694117647059e-05,
"loss": 0.0774,
"step": 12200
},
{
"epoch": 26.45,
"learning_rate": 3.093458823529412e-05,
"loss": 0.0974,
"step": 12300
},
{
"epoch": 26.67,
"learning_rate": 3.065223529411764e-05,
"loss": 0.0807,
"step": 12400
},
{
"epoch": 26.88,
"learning_rate": 3.0372705882352942e-05,
"loss": 0.0835,
"step": 12500
},
{
"epoch": 26.88,
"eval_loss": 0.4728855490684509,
"eval_runtime": 81.9776,
"eval_samples_per_second": 10.088,
"eval_steps_per_second": 1.269,
"eval_wer": 0.17807183364839319,
"step": 12500
},
{
"epoch": 27.1,
"learning_rate": 3.009035294117647e-05,
"loss": 0.0888,
"step": 12600
},
{
"epoch": 27.31,
"learning_rate": 2.9808000000000003e-05,
"loss": 0.0845,
"step": 12700
},
{
"epoch": 27.53,
"learning_rate": 2.952564705882353e-05,
"loss": 0.0888,
"step": 12800
},
{
"epoch": 27.74,
"learning_rate": 2.9243294117647058e-05,
"loss": 0.0816,
"step": 12900
},
{
"epoch": 27.96,
"learning_rate": 2.8960941176470588e-05,
"loss": 0.0919,
"step": 13000
},
{
"epoch": 27.96,
"eval_loss": 0.4152912497520447,
"eval_runtime": 81.9103,
"eval_samples_per_second": 10.096,
"eval_steps_per_second": 1.27,
"eval_wer": 0.17013232514177692,
"step": 13000
},
{
"epoch": 28.17,
"learning_rate": 2.867858823529412e-05,
"loss": 0.0886,
"step": 13100
},
{
"epoch": 28.39,
"learning_rate": 2.8396235294117646e-05,
"loss": 0.0809,
"step": 13200
},
{
"epoch": 28.6,
"learning_rate": 2.8113882352941176e-05,
"loss": 0.0786,
"step": 13300
},
{
"epoch": 28.82,
"learning_rate": 2.7831529411764707e-05,
"loss": 0.0766,
"step": 13400
},
{
"epoch": 29.03,
"learning_rate": 2.7552e-05,
"loss": 0.0677,
"step": 13500
},
{
"epoch": 29.03,
"eval_loss": 0.431657612323761,
"eval_runtime": 81.4905,
"eval_samples_per_second": 10.148,
"eval_steps_per_second": 1.276,
"eval_wer": 0.16928166351606805,
"step": 13500
},
{
"epoch": 29.25,
"learning_rate": 2.726964705882353e-05,
"loss": 0.0757,
"step": 13600
},
{
"epoch": 29.46,
"learning_rate": 2.698729411764706e-05,
"loss": 0.0719,
"step": 13700
},
{
"epoch": 29.68,
"learning_rate": 2.670494117647059e-05,
"loss": 0.0772,
"step": 13800
},
{
"epoch": 29.89,
"learning_rate": 2.642258823529412e-05,
"loss": 0.0762,
"step": 13900
},
{
"epoch": 30.11,
"learning_rate": 2.614023529411765e-05,
"loss": 0.0726,
"step": 14000
},
{
"epoch": 30.11,
"eval_loss": 0.43801796436309814,
"eval_runtime": 83.6487,
"eval_samples_per_second": 9.887,
"eval_steps_per_second": 1.243,
"eval_wer": 0.17362948960302457,
"step": 14000
},
{
"epoch": 30.32,
"learning_rate": 2.5857882352941176e-05,
"loss": 0.0654,
"step": 14100
},
{
"epoch": 30.54,
"learning_rate": 2.5575529411764707e-05,
"loss": 0.0691,
"step": 14200
},
{
"epoch": 30.75,
"learning_rate": 2.5293176470588234e-05,
"loss": 0.0677,
"step": 14300
},
{
"epoch": 30.97,
"learning_rate": 2.5010823529411764e-05,
"loss": 0.0668,
"step": 14400
},
{
"epoch": 31.18,
"learning_rate": 2.4728470588235295e-05,
"loss": 0.066,
"step": 14500
},
{
"epoch": 31.18,
"eval_loss": 0.4384245276451111,
"eval_runtime": 81.1759,
"eval_samples_per_second": 10.188,
"eval_steps_per_second": 1.281,
"eval_wer": 0.16814744801512288,
"step": 14500
},
{
"epoch": 31.4,
"learning_rate": 2.4446117647058825e-05,
"loss": 0.0663,
"step": 14600
},
{
"epoch": 31.61,
"learning_rate": 2.4163764705882352e-05,
"loss": 0.0688,
"step": 14700
},
{
"epoch": 31.83,
"learning_rate": 2.3881411764705883e-05,
"loss": 0.0677,
"step": 14800
},
{
"epoch": 32.04,
"learning_rate": 2.3599058823529414e-05,
"loss": 0.0718,
"step": 14900
},
{
"epoch": 32.26,
"learning_rate": 2.3316705882352944e-05,
"loss": 0.0713,
"step": 15000
},
{
"epoch": 32.26,
"eval_loss": 0.4215048551559448,
"eval_runtime": 80.5465,
"eval_samples_per_second": 10.267,
"eval_steps_per_second": 1.291,
"eval_wer": 0.16294896030245748,
"step": 15000
},
{
"epoch": 32.47,
"learning_rate": 2.303435294117647e-05,
"loss": 0.065,
"step": 15100
},
{
"epoch": 32.69,
"learning_rate": 2.2752e-05,
"loss": 0.0631,
"step": 15200
},
{
"epoch": 32.9,
"learning_rate": 2.246964705882353e-05,
"loss": 0.0616,
"step": 15300
},
{
"epoch": 33.12,
"learning_rate": 2.218729411764706e-05,
"loss": 0.0812,
"step": 15400
},
{
"epoch": 33.33,
"learning_rate": 2.190494117647059e-05,
"loss": 0.0605,
"step": 15500
},
{
"epoch": 33.33,
"eval_loss": 0.457427054643631,
"eval_runtime": 80.6078,
"eval_samples_per_second": 10.26,
"eval_steps_per_second": 1.29,
"eval_wer": 0.1713610586011342,
"step": 15500
},
{
"epoch": 33.55,
"learning_rate": 2.1622588235294117e-05,
"loss": 0.0566,
"step": 15600
},
{
"epoch": 33.76,
"learning_rate": 2.1340235294117648e-05,
"loss": 0.064,
"step": 15700
},
{
"epoch": 33.98,
"learning_rate": 2.1057882352941178e-05,
"loss": 0.0591,
"step": 15800
},
{
"epoch": 34.19,
"learning_rate": 2.077552941176471e-05,
"loss": 0.0581,
"step": 15900
},
{
"epoch": 34.41,
"learning_rate": 2.0493176470588236e-05,
"loss": 0.0632,
"step": 16000
},
{
"epoch": 34.41,
"eval_loss": 0.43431583046913147,
"eval_runtime": 81.6162,
"eval_samples_per_second": 10.133,
"eval_steps_per_second": 1.274,
"eval_wer": 0.16417769376181474,
"step": 16000
},
{
"epoch": 34.62,
"learning_rate": 2.0210823529411763e-05,
"loss": 0.0578,
"step": 16100
},
{
"epoch": 34.84,
"learning_rate": 1.9928470588235293e-05,
"loss": 0.0616,
"step": 16200
},
{
"epoch": 35.05,
"learning_rate": 1.9648941176470586e-05,
"loss": 0.0516,
"step": 16300
},
{
"epoch": 35.27,
"learning_rate": 1.9366588235294117e-05,
"loss": 0.0533,
"step": 16400
},
{
"epoch": 35.48,
"learning_rate": 1.9084235294117647e-05,
"loss": 0.0567,
"step": 16500
},
{
"epoch": 35.48,
"eval_loss": 0.42308300733566284,
"eval_runtime": 82.0264,
"eval_samples_per_second": 10.082,
"eval_steps_per_second": 1.268,
"eval_wer": 0.16011342155009453,
"step": 16500
},
{
"epoch": 35.7,
"learning_rate": 1.8801882352941178e-05,
"loss": 0.0562,
"step": 16600
},
{
"epoch": 35.91,
"learning_rate": 1.8519529411764705e-05,
"loss": 0.0554,
"step": 16700
},
{
"epoch": 36.13,
"learning_rate": 1.8237176470588236e-05,
"loss": 0.0564,
"step": 16800
},
{
"epoch": 36.34,
"learning_rate": 1.7954823529411766e-05,
"loss": 0.0617,
"step": 16900
},
{
"epoch": 36.56,
"learning_rate": 1.7672470588235297e-05,
"loss": 0.0556,
"step": 17000
},
{
"epoch": 36.56,
"eval_loss": 0.4404306411743164,
"eval_runtime": 81.4874,
"eval_samples_per_second": 10.149,
"eval_steps_per_second": 1.276,
"eval_wer": 0.1667296786389414,
"step": 17000
},
{
"epoch": 36.77,
"learning_rate": 1.7390117647058824e-05,
"loss": 0.0524,
"step": 17100
},
{
"epoch": 36.99,
"learning_rate": 1.7107764705882354e-05,
"loss": 0.0602,
"step": 17200
},
{
"epoch": 37.2,
"learning_rate": 1.682541176470588e-05,
"loss": 0.0472,
"step": 17300
},
{
"epoch": 37.42,
"learning_rate": 1.6543058823529412e-05,
"loss": 0.047,
"step": 17400
},
{
"epoch": 37.63,
"learning_rate": 1.6260705882352943e-05,
"loss": 0.0426,
"step": 17500
},
{
"epoch": 37.63,
"eval_loss": 0.4458593428134918,
"eval_runtime": 80.8169,
"eval_samples_per_second": 10.233,
"eval_steps_per_second": 1.287,
"eval_wer": 0.16247637051039698,
"step": 17500
},
{
"epoch": 37.85,
"learning_rate": 1.597835294117647e-05,
"loss": 0.0513,
"step": 17600
},
{
"epoch": 38.06,
"learning_rate": 1.5696e-05,
"loss": 0.056,
"step": 17700
},
{
"epoch": 38.28,
"learning_rate": 1.541364705882353e-05,
"loss": 0.0498,
"step": 17800
},
{
"epoch": 38.49,
"learning_rate": 1.513129411764706e-05,
"loss": 0.0473,
"step": 17900
},
{
"epoch": 38.71,
"learning_rate": 1.4848941176470588e-05,
"loss": 0.0445,
"step": 18000
},
{
"epoch": 38.71,
"eval_loss": 0.4483908712863922,
"eval_runtime": 82.388,
"eval_samples_per_second": 10.038,
"eval_steps_per_second": 1.262,
"eval_wer": 0.16285444234404536,
"step": 18000
},
{
"epoch": 38.92,
"learning_rate": 1.4566588235294117e-05,
"loss": 0.0487,
"step": 18100
},
{
"epoch": 39.14,
"learning_rate": 1.4284235294117648e-05,
"loss": 0.0491,
"step": 18200
},
{
"epoch": 39.35,
"learning_rate": 1.4001882352941177e-05,
"loss": 0.044,
"step": 18300
},
{
"epoch": 39.57,
"learning_rate": 1.3722352941176471e-05,
"loss": 0.0467,
"step": 18400
},
{
"epoch": 39.78,
"learning_rate": 1.344e-05,
"loss": 0.0463,
"step": 18500
},
{
"epoch": 39.78,
"eval_loss": 0.45076683163642883,
"eval_runtime": 81.2197,
"eval_samples_per_second": 10.182,
"eval_steps_per_second": 1.28,
"eval_wer": 0.15964083175803404,
"step": 18500
},
{
"epoch": 40.0,
"learning_rate": 1.315764705882353e-05,
"loss": 0.0464,
"step": 18600
},
{
"epoch": 40.22,
"learning_rate": 1.2875294117647058e-05,
"loss": 0.0395,
"step": 18700
},
{
"epoch": 40.43,
"learning_rate": 1.2592941176470588e-05,
"loss": 0.0421,
"step": 18800
},
{
"epoch": 40.65,
"learning_rate": 1.2310588235294119e-05,
"loss": 0.0463,
"step": 18900
},
{
"epoch": 40.86,
"learning_rate": 1.2028235294117648e-05,
"loss": 0.0448,
"step": 19000
},
{
"epoch": 40.86,
"eval_loss": 0.4395386278629303,
"eval_runtime": 82.035,
"eval_samples_per_second": 10.081,
"eval_steps_per_second": 1.268,
"eval_wer": 0.1604914933837429,
"step": 19000
},
{
"epoch": 41.08,
"learning_rate": 1.1745882352941178e-05,
"loss": 0.0483,
"step": 19100
},
{
"epoch": 41.29,
"learning_rate": 1.1463529411764705e-05,
"loss": 0.0457,
"step": 19200
},
{
"epoch": 41.51,
"learning_rate": 1.1181176470588236e-05,
"loss": 0.0363,
"step": 19300
},
{
"epoch": 41.72,
"learning_rate": 1.0898823529411765e-05,
"loss": 0.0428,
"step": 19400
},
{
"epoch": 41.94,
"learning_rate": 1.0616470588235295e-05,
"loss": 0.0434,
"step": 19500
},
{
"epoch": 41.94,
"eval_loss": 0.4489993453025818,
"eval_runtime": 82.599,
"eval_samples_per_second": 10.012,
"eval_steps_per_second": 1.259,
"eval_wer": 0.16068052930056712,
"step": 19500
},
{
"epoch": 42.15,
"learning_rate": 1.0334117647058824e-05,
"loss": 0.0389,
"step": 19600
},
{
"epoch": 42.37,
"learning_rate": 1.0051764705882353e-05,
"loss": 0.0482,
"step": 19700
},
{
"epoch": 42.58,
"learning_rate": 9.769411764705882e-06,
"loss": 0.0467,
"step": 19800
},
{
"epoch": 42.8,
"learning_rate": 9.487058823529412e-06,
"loss": 0.0379,
"step": 19900
},
{
"epoch": 43.01,
"learning_rate": 9.204705882352941e-06,
"loss": 0.0347,
"step": 20000
},
{
"epoch": 43.01,
"eval_loss": 0.47717225551605225,
"eval_runtime": 81.2937,
"eval_samples_per_second": 10.173,
"eval_steps_per_second": 1.279,
"eval_wer": 0.15822306238185255,
"step": 20000
},
{
"epoch": 43.23,
"learning_rate": 8.922352941176471e-06,
"loss": 0.0327,
"step": 20100
},
{
"epoch": 43.44,
"learning_rate": 8.64e-06,
"loss": 0.0407,
"step": 20200
},
{
"epoch": 43.66,
"learning_rate": 8.357647058823529e-06,
"loss": 0.0368,
"step": 20300
},
{
"epoch": 43.87,
"learning_rate": 8.07529411764706e-06,
"loss": 0.0365,
"step": 20400
},
{
"epoch": 44.09,
"learning_rate": 7.792941176470588e-06,
"loss": 0.0332,
"step": 20500
},
{
"epoch": 44.09,
"eval_loss": 0.4728728234767914,
"eval_runtime": 81.4631,
"eval_samples_per_second": 10.152,
"eval_steps_per_second": 1.277,
"eval_wer": 0.15822306238185255,
"step": 20500
},
{
"epoch": 44.3,
"learning_rate": 7.510588235294118e-06,
"loss": 0.0353,
"step": 20600
},
{
"epoch": 44.52,
"learning_rate": 7.228235294117648e-06,
"loss": 0.039,
"step": 20700
},
{
"epoch": 44.73,
"learning_rate": 6.945882352941177e-06,
"loss": 0.0395,
"step": 20800
},
{
"epoch": 44.95,
"learning_rate": 6.663529411764706e-06,
"loss": 0.0368,
"step": 20900
},
{
"epoch": 45.16,
"learning_rate": 6.381176470588236e-06,
"loss": 0.037,
"step": 21000
},
{
"epoch": 45.16,
"eval_loss": 0.45591220259666443,
"eval_runtime": 83.1729,
"eval_samples_per_second": 9.943,
"eval_steps_per_second": 1.25,
"eval_wer": 0.15727788279773156,
"step": 21000
},
{
"epoch": 45.38,
"learning_rate": 6.098823529411765e-06,
"loss": 0.0328,
"step": 21100
},
{
"epoch": 45.59,
"learning_rate": 5.8164705882352945e-06,
"loss": 0.0541,
"step": 21200
},
{
"epoch": 45.81,
"learning_rate": 5.534117647058823e-06,
"loss": 0.0413,
"step": 21300
},
{
"epoch": 46.02,
"learning_rate": 5.251764705882353e-06,
"loss": 0.0403,
"step": 21400
},
{
"epoch": 46.24,
"learning_rate": 4.969411764705883e-06,
"loss": 0.0328,
"step": 21500
},
{
"epoch": 46.24,
"eval_loss": 0.46641021966934204,
"eval_runtime": 82.6563,
"eval_samples_per_second": 10.005,
"eval_steps_per_second": 1.258,
"eval_wer": 0.1560491493383743,
"step": 21500
},
{
"epoch": 46.45,
"learning_rate": 4.6870588235294115e-06,
"loss": 0.0334,
"step": 21600
},
{
"epoch": 46.67,
"learning_rate": 4.404705882352941e-06,
"loss": 0.0334,
"step": 21700
},
{
"epoch": 46.88,
"learning_rate": 4.122352941176471e-06,
"loss": 0.0324,
"step": 21800
},
{
"epoch": 47.1,
"learning_rate": 3.8400000000000005e-06,
"loss": 0.0337,
"step": 21900
},
{
"epoch": 47.31,
"learning_rate": 3.5576470588235293e-06,
"loss": 0.0366,
"step": 22000
},
{
"epoch": 47.31,
"eval_loss": 0.454330176115036,
"eval_runtime": 81.2727,
"eval_samples_per_second": 10.176,
"eval_steps_per_second": 1.28,
"eval_wer": 0.15434782608695652,
"step": 22000
},
{
"epoch": 47.53,
"learning_rate": 3.275294117647059e-06,
"loss": 0.0332,
"step": 22100
},
{
"epoch": 47.74,
"learning_rate": 2.9929411764705883e-06,
"loss": 0.0295,
"step": 22200
},
{
"epoch": 47.96,
"learning_rate": 2.710588235294118e-06,
"loss": 0.037,
"step": 22300
},
{
"epoch": 48.17,
"learning_rate": 2.428235294117647e-06,
"loss": 0.0314,
"step": 22400
},
{
"epoch": 48.39,
"learning_rate": 2.1458823529411764e-06,
"loss": 0.0377,
"step": 22500
},
{
"epoch": 48.39,
"eval_loss": 0.45068359375,
"eval_runtime": 80.8954,
"eval_samples_per_second": 10.223,
"eval_steps_per_second": 1.286,
"eval_wer": 0.15595463137996218,
"step": 22500
},
{
"epoch": 48.6,
"learning_rate": 1.8635294117647059e-06,
"loss": 0.0284,
"step": 22600
},
{
"epoch": 48.82,
"learning_rate": 1.5811764705882354e-06,
"loss": 0.0326,
"step": 22700
},
{
"epoch": 49.03,
"learning_rate": 1.2988235294117648e-06,
"loss": 0.034,
"step": 22800
},
{
"epoch": 49.25,
"learning_rate": 1.016470588235294e-06,
"loss": 0.0319,
"step": 22900
},
{
"epoch": 49.46,
"learning_rate": 7.341176470588236e-07,
"loss": 0.0331,
"step": 23000
},
{
"epoch": 49.46,
"eval_loss": 0.4567229151725769,
"eval_runtime": 80.8744,
"eval_samples_per_second": 10.226,
"eval_steps_per_second": 1.286,
"eval_wer": 0.15330812854442344,
"step": 23000
},
{
"epoch": 49.68,
"learning_rate": 4.51764705882353e-07,
"loss": 0.0334,
"step": 23100
},
{
"epoch": 49.89,
"learning_rate": 1.6941176470588237e-07,
"loss": 0.027,
"step": 23200
},
{
"epoch": 50.0,
"step": 23250,
"total_flos": 2.4501256105279144e+20,
"train_loss": 0.23119104407936014,
"train_runtime": 64754.7357,
"train_samples_per_second": 5.744,
"train_steps_per_second": 0.359
}
],
"max_steps": 23250,
"num_train_epochs": 50,
"total_flos": 2.4501256105279144e+20,
"trial_name": null,
"trial_params": null
}