wav2vec2-large-xls-r-300m-hindi / trainer_state.json
infinitejoy's picture
End of training
aac3ebd
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 100.0,
"global_step": 14800,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.68,
"learning_rate": 3.6375e-06,
"loss": 19.2806,
"step": 100
},
{
"epoch": 1.35,
"learning_rate": 7.3875e-06,
"loss": 10.959,
"step": 200
},
{
"epoch": 2.03,
"learning_rate": 1.1137499999999998e-05,
"loss": 6.9106,
"step": 300
},
{
"epoch": 2.7,
"learning_rate": 1.48875e-05,
"loss": 5.624,
"step": 400
},
{
"epoch": 3.38,
"learning_rate": 1.86375e-05,
"loss": 4.6095,
"step": 500
},
{
"epoch": 3.38,
"eval_loss": 4.5881452560424805,
"eval_runtime": 92.8763,
"eval_samples_per_second": 22.557,
"eval_steps_per_second": 0.711,
"eval_wer": 0.9999453103636861,
"step": 500
},
{
"epoch": 4.05,
"learning_rate": 2.23875e-05,
"loss": 3.9225,
"step": 600
},
{
"epoch": 4.73,
"learning_rate": 2.6137499999999995e-05,
"loss": 3.5503,
"step": 700
},
{
"epoch": 5.41,
"learning_rate": 2.9887499999999998e-05,
"loss": 3.4262,
"step": 800
},
{
"epoch": 6.08,
"learning_rate": 3.36375e-05,
"loss": 3.391,
"step": 900
},
{
"epoch": 6.76,
"learning_rate": 3.7387499999999994e-05,
"loss": 3.3396,
"step": 1000
},
{
"epoch": 6.76,
"eval_loss": 3.3301022052764893,
"eval_runtime": 93.143,
"eval_samples_per_second": 22.492,
"eval_steps_per_second": 0.709,
"eval_wer": 1.0000546896363138,
"step": 1000
},
{
"epoch": 7.43,
"learning_rate": 4.11375e-05,
"loss": 3.2895,
"step": 1100
},
{
"epoch": 8.11,
"learning_rate": 4.48875e-05,
"loss": 3.228,
"step": 1200
},
{
"epoch": 8.78,
"learning_rate": 4.8637499999999996e-05,
"loss": 2.987,
"step": 1300
},
{
"epoch": 9.46,
"learning_rate": 5.23875e-05,
"loss": 2.4352,
"step": 1400
},
{
"epoch": 10.14,
"learning_rate": 5.61375e-05,
"loss": 2.0061,
"step": 1500
},
{
"epoch": 10.14,
"eval_loss": 1.20964777469635,
"eval_runtime": 93.1759,
"eval_samples_per_second": 22.484,
"eval_steps_per_second": 0.708,
"eval_wer": 1.0063439978124145,
"step": 1500
},
{
"epoch": 10.81,
"learning_rate": 5.988749999999999e-05,
"loss": 1.7983,
"step": 1600
},
{
"epoch": 11.49,
"learning_rate": 6.36375e-05,
"loss": 1.6684,
"step": 1700
},
{
"epoch": 12.16,
"learning_rate": 6.738749999999999e-05,
"loss": 1.6307,
"step": 1800
},
{
"epoch": 12.84,
"learning_rate": 7.11375e-05,
"loss": 1.5654,
"step": 1900
},
{
"epoch": 13.51,
"learning_rate": 7.48875e-05,
"loss": 1.523,
"step": 2000
},
{
"epoch": 13.51,
"eval_loss": 0.7836298942565918,
"eval_runtime": 93.1376,
"eval_samples_per_second": 22.494,
"eval_steps_per_second": 0.709,
"eval_wer": 1.0051408258135084,
"step": 2000
},
{
"epoch": 14.19,
"learning_rate": 7.4431640625e-05,
"loss": 1.4937,
"step": 2100
},
{
"epoch": 14.86,
"learning_rate": 7.384570312499999e-05,
"loss": 1.4538,
"step": 2200
},
{
"epoch": 15.54,
"learning_rate": 7.325976562499999e-05,
"loss": 1.4016,
"step": 2300
},
{
"epoch": 16.22,
"learning_rate": 7.2673828125e-05,
"loss": 1.403,
"step": 2400
},
{
"epoch": 16.89,
"learning_rate": 7.2087890625e-05,
"loss": 1.3868,
"step": 2500
},
{
"epoch": 16.89,
"eval_loss": 0.6836622953414917,
"eval_runtime": 92.9915,
"eval_samples_per_second": 22.529,
"eval_steps_per_second": 0.71,
"eval_wer": 1.007984686901832,
"step": 2500
},
{
"epoch": 17.57,
"learning_rate": 7.150195312499999e-05,
"loss": 1.3313,
"step": 2600
},
{
"epoch": 18.24,
"learning_rate": 7.091601562499999e-05,
"loss": 1.2931,
"step": 2700
},
{
"epoch": 18.92,
"learning_rate": 7.0330078125e-05,
"loss": 1.3319,
"step": 2800
},
{
"epoch": 19.59,
"learning_rate": 6.9744140625e-05,
"loss": 1.3133,
"step": 2900
},
{
"epoch": 20.27,
"learning_rate": 6.915820312499999e-05,
"loss": 1.2807,
"step": 3000
},
{
"epoch": 20.27,
"eval_loss": 0.656791090965271,
"eval_runtime": 93.0318,
"eval_samples_per_second": 22.519,
"eval_steps_per_second": 0.709,
"eval_wer": 1.0112113754443532,
"step": 3000
},
{
"epoch": 20.95,
"learning_rate": 6.857226562499999e-05,
"loss": 1.2626,
"step": 3100
},
{
"epoch": 21.62,
"learning_rate": 6.7986328125e-05,
"loss": 1.2535,
"step": 3200
},
{
"epoch": 22.3,
"learning_rate": 6.7400390625e-05,
"loss": 1.2297,
"step": 3300
},
{
"epoch": 22.97,
"learning_rate": 6.681445312499999e-05,
"loss": 1.2074,
"step": 3400
},
{
"epoch": 23.65,
"learning_rate": 6.6234375e-05,
"loss": 1.231,
"step": 3500
},
{
"epoch": 23.65,
"eval_loss": 0.612037718296051,
"eval_runtime": 93.2672,
"eval_samples_per_second": 22.462,
"eval_steps_per_second": 0.708,
"eval_wer": 1.0105004101722723,
"step": 3500
},
{
"epoch": 24.32,
"learning_rate": 6.564843749999999e-05,
"loss": 1.2092,
"step": 3600
},
{
"epoch": 25.0,
"learning_rate": 6.50625e-05,
"loss": 1.1775,
"step": 3700
},
{
"epoch": 25.68,
"learning_rate": 6.44765625e-05,
"loss": 1.1916,
"step": 3800
},
{
"epoch": 26.35,
"learning_rate": 6.3890625e-05,
"loss": 1.1798,
"step": 3900
},
{
"epoch": 27.03,
"learning_rate": 6.330468749999999e-05,
"loss": 1.1673,
"step": 4000
},
{
"epoch": 27.03,
"eval_loss": 0.5971782207489014,
"eval_runtime": 93.4057,
"eval_samples_per_second": 22.429,
"eval_steps_per_second": 0.707,
"eval_wer": 1.0089144107191688,
"step": 4000
},
{
"epoch": 27.7,
"learning_rate": 6.271875e-05,
"loss": 1.1626,
"step": 4100
},
{
"epoch": 28.38,
"learning_rate": 6.21328125e-05,
"loss": 1.162,
"step": 4200
},
{
"epoch": 29.05,
"learning_rate": 6.1546875e-05,
"loss": 1.1471,
"step": 4300
},
{
"epoch": 29.73,
"learning_rate": 6.09609375e-05,
"loss": 1.1203,
"step": 4400
},
{
"epoch": 30.41,
"learning_rate": 6.0375e-05,
"loss": 1.1416,
"step": 4500
},
{
"epoch": 30.41,
"eval_loss": 0.5780259966850281,
"eval_runtime": 92.6708,
"eval_samples_per_second": 22.607,
"eval_steps_per_second": 0.712,
"eval_wer": 1.0131802023516543,
"step": 4500
},
{
"epoch": 31.08,
"learning_rate": 5.97890625e-05,
"loss": 1.1175,
"step": 4600
},
{
"epoch": 31.76,
"learning_rate": 5.9203125e-05,
"loss": 1.1053,
"step": 4700
},
{
"epoch": 32.43,
"learning_rate": 5.86171875e-05,
"loss": 1.107,
"step": 4800
},
{
"epoch": 33.11,
"learning_rate": 5.803125e-05,
"loss": 1.115,
"step": 4900
},
{
"epoch": 33.78,
"learning_rate": 5.74453125e-05,
"loss": 1.0738,
"step": 5000
},
{
"epoch": 33.78,
"eval_loss": 0.580643892288208,
"eval_runtime": 95.0733,
"eval_samples_per_second": 22.036,
"eval_steps_per_second": 0.694,
"eval_wer": 1.0123051681706317,
"step": 5000
},
{
"epoch": 34.46,
"learning_rate": 5.686523437499999e-05,
"loss": 1.0778,
"step": 5100
},
{
"epoch": 35.14,
"learning_rate": 5.6279296874999994e-05,
"loss": 1.0948,
"step": 5200
},
{
"epoch": 35.81,
"learning_rate": 5.569335937499999e-05,
"loss": 1.062,
"step": 5300
},
{
"epoch": 36.49,
"learning_rate": 5.5107421874999994e-05,
"loss": 1.0503,
"step": 5400
},
{
"epoch": 37.16,
"learning_rate": 5.452148437499999e-05,
"loss": 1.0771,
"step": 5500
},
{
"epoch": 37.16,
"eval_loss": 0.5585715174674988,
"eval_runtime": 92.7751,
"eval_samples_per_second": 22.581,
"eval_steps_per_second": 0.711,
"eval_wer": 1.0066721356302981,
"step": 5500
},
{
"epoch": 37.84,
"learning_rate": 5.3935546874999995e-05,
"loss": 1.0368,
"step": 5600
},
{
"epoch": 38.51,
"learning_rate": 5.334960937499999e-05,
"loss": 1.0329,
"step": 5700
},
{
"epoch": 39.19,
"learning_rate": 5.2763671874999995e-05,
"loss": 1.0402,
"step": 5800
},
{
"epoch": 39.86,
"learning_rate": 5.217773437499999e-05,
"loss": 1.0346,
"step": 5900
},
{
"epoch": 40.54,
"learning_rate": 5.1591796874999995e-05,
"loss": 1.0287,
"step": 6000
},
{
"epoch": 40.54,
"eval_loss": 0.5463963747024536,
"eval_runtime": 92.708,
"eval_samples_per_second": 22.598,
"eval_steps_per_second": 0.712,
"eval_wer": 1.0057971014492753,
"step": 6000
},
{
"epoch": 41.22,
"learning_rate": 5.100585937499999e-05,
"loss": 1.0196,
"step": 6100
},
{
"epoch": 41.89,
"learning_rate": 5.0419921874999995e-05,
"loss": 1.0198,
"step": 6200
},
{
"epoch": 42.57,
"learning_rate": 4.983398437499999e-05,
"loss": 0.9864,
"step": 6300
},
{
"epoch": 43.24,
"learning_rate": 4.9248046874999996e-05,
"loss": 0.999,
"step": 6400
},
{
"epoch": 43.92,
"learning_rate": 4.866210937499999e-05,
"loss": 1.0106,
"step": 6500
},
{
"epoch": 43.92,
"eval_loss": 0.5407418608665466,
"eval_runtime": 92.9267,
"eval_samples_per_second": 22.545,
"eval_steps_per_second": 0.71,
"eval_wer": 1.0061799289034727,
"step": 6500
},
{
"epoch": 44.59,
"learning_rate": 4.8076171874999996e-05,
"loss": 0.9816,
"step": 6600
},
{
"epoch": 45.27,
"learning_rate": 4.749023437499999e-05,
"loss": 0.976,
"step": 6700
},
{
"epoch": 45.95,
"learning_rate": 4.6904296874999996e-05,
"loss": 0.9833,
"step": 6800
},
{
"epoch": 46.62,
"learning_rate": 4.631835937499999e-05,
"loss": 0.9905,
"step": 6900
},
{
"epoch": 47.3,
"learning_rate": 4.5732421875e-05,
"loss": 0.9538,
"step": 7000
},
{
"epoch": 47.3,
"eval_loss": 0.5333988070487976,
"eval_runtime": 92.3771,
"eval_samples_per_second": 22.679,
"eval_steps_per_second": 0.714,
"eval_wer": 1.0089144107191688,
"step": 7000
},
{
"epoch": 47.97,
"learning_rate": 4.5146484374999993e-05,
"loss": 0.9578,
"step": 7100
},
{
"epoch": 48.65,
"learning_rate": 4.4560546875e-05,
"loss": 0.9607,
"step": 7200
},
{
"epoch": 49.32,
"learning_rate": 4.3974609374999994e-05,
"loss": 0.9451,
"step": 7300
},
{
"epoch": 50.0,
"learning_rate": 4.3388671875e-05,
"loss": 0.9453,
"step": 7400
},
{
"epoch": 50.68,
"learning_rate": 4.2802734374999994e-05,
"loss": 0.9607,
"step": 7500
},
{
"epoch": 50.68,
"eval_loss": 0.5395269989967346,
"eval_runtime": 92.6715,
"eval_samples_per_second": 22.607,
"eval_steps_per_second": 0.712,
"eval_wer": 1.0110473065354115,
"step": 7500
},
{
"epoch": 51.35,
"learning_rate": 4.2216796875e-05,
"loss": 0.9445,
"step": 7600
},
{
"epoch": 52.03,
"learning_rate": 4.1630859374999994e-05,
"loss": 0.9314,
"step": 7700
},
{
"epoch": 52.7,
"learning_rate": 4.1044921875e-05,
"loss": 0.9166,
"step": 7800
},
{
"epoch": 53.38,
"learning_rate": 4.0458984374999995e-05,
"loss": 0.9264,
"step": 7900
},
{
"epoch": 54.05,
"learning_rate": 3.987304687499999e-05,
"loss": 0.9108,
"step": 8000
},
{
"epoch": 54.05,
"eval_loss": 0.5501919388771057,
"eval_runtime": 92.8418,
"eval_samples_per_second": 22.565,
"eval_steps_per_second": 0.711,
"eval_wer": 1.0137270987147935,
"step": 8000
},
{
"epoch": 54.73,
"learning_rate": 3.9287109374999995e-05,
"loss": 0.9215,
"step": 8100
},
{
"epoch": 55.41,
"learning_rate": 3.870117187499999e-05,
"loss": 0.9326,
"step": 8200
},
{
"epoch": 56.08,
"learning_rate": 3.8115234374999995e-05,
"loss": 0.9199,
"step": 8300
},
{
"epoch": 56.76,
"learning_rate": 3.752929687499999e-05,
"loss": 0.9043,
"step": 8400
},
{
"epoch": 57.43,
"learning_rate": 3.6943359374999996e-05,
"loss": 0.9252,
"step": 8500
},
{
"epoch": 57.43,
"eval_loss": 0.5498473644256592,
"eval_runtime": 92.009,
"eval_samples_per_second": 22.77,
"eval_steps_per_second": 0.717,
"eval_wer": 1.0062346185397868,
"step": 8500
},
{
"epoch": 58.11,
"learning_rate": 3.635742187499999e-05,
"loss": 0.9041,
"step": 8600
},
{
"epoch": 58.78,
"learning_rate": 3.5771484374999996e-05,
"loss": 0.8963,
"step": 8700
},
{
"epoch": 59.46,
"learning_rate": 3.518554687499999e-05,
"loss": 0.901,
"step": 8800
},
{
"epoch": 60.14,
"learning_rate": 3.4599609374999996e-05,
"loss": 0.8947,
"step": 8900
},
{
"epoch": 60.81,
"learning_rate": 3.401367187499999e-05,
"loss": 0.8943,
"step": 9000
},
{
"epoch": 60.81,
"eval_loss": 0.5447660684585571,
"eval_runtime": 92.1905,
"eval_samples_per_second": 22.725,
"eval_steps_per_second": 0.716,
"eval_wer": 1.0158053048947224,
"step": 9000
},
{
"epoch": 61.49,
"learning_rate": 3.3427734375e-05,
"loss": 0.875,
"step": 9100
},
{
"epoch": 62.16,
"learning_rate": 3.284765625e-05,
"loss": 0.8961,
"step": 9200
},
{
"epoch": 62.84,
"learning_rate": 3.2261718749999996e-05,
"loss": 0.8848,
"step": 9300
},
{
"epoch": 63.51,
"learning_rate": 3.167578125e-05,
"loss": 0.8573,
"step": 9400
},
{
"epoch": 64.19,
"learning_rate": 3.1089843749999996e-05,
"loss": 0.8728,
"step": 9500
},
{
"epoch": 64.19,
"eval_loss": 0.5256930589675903,
"eval_runtime": 92.3173,
"eval_samples_per_second": 22.693,
"eval_steps_per_second": 0.715,
"eval_wer": 1.0113207547169811,
"step": 9500
},
{
"epoch": 64.86,
"learning_rate": 3.050390625e-05,
"loss": 0.8819,
"step": 9600
},
{
"epoch": 65.54,
"learning_rate": 2.991796875e-05,
"loss": 0.8324,
"step": 9700
},
{
"epoch": 66.22,
"learning_rate": 2.933203125e-05,
"loss": 0.8461,
"step": 9800
},
{
"epoch": 66.89,
"learning_rate": 2.874609375e-05,
"loss": 0.8512,
"step": 9900
},
{
"epoch": 67.57,
"learning_rate": 2.816015625e-05,
"loss": 0.8577,
"step": 10000
},
{
"epoch": 67.57,
"eval_loss": 0.554978609085083,
"eval_runtime": 92.3777,
"eval_samples_per_second": 22.679,
"eval_steps_per_second": 0.714,
"eval_wer": 1.0177741318020235,
"step": 10000
},
{
"epoch": 68.24,
"learning_rate": 2.7574218749999997e-05,
"loss": 0.845,
"step": 10100
},
{
"epoch": 68.92,
"learning_rate": 2.6988281249999997e-05,
"loss": 0.8622,
"step": 10200
},
{
"epoch": 69.59,
"learning_rate": 2.6402343749999998e-05,
"loss": 0.8534,
"step": 10300
},
{
"epoch": 70.27,
"learning_rate": 2.5816406249999998e-05,
"loss": 0.8328,
"step": 10400
},
{
"epoch": 70.95,
"learning_rate": 2.5230468749999998e-05,
"loss": 0.8332,
"step": 10500
},
{
"epoch": 70.95,
"eval_loss": 0.5607455968856812,
"eval_runtime": 91.9364,
"eval_samples_per_second": 22.787,
"eval_steps_per_second": 0.718,
"eval_wer": 1.0165709598031174,
"step": 10500
},
{
"epoch": 71.62,
"learning_rate": 2.4644531249999998e-05,
"loss": 0.8389,
"step": 10600
},
{
"epoch": 72.3,
"learning_rate": 2.4058593749999998e-05,
"loss": 0.8225,
"step": 10700
},
{
"epoch": 72.97,
"learning_rate": 2.347265625e-05,
"loss": 0.8141,
"step": 10800
},
{
"epoch": 73.65,
"learning_rate": 2.288671875e-05,
"loss": 0.8215,
"step": 10900
},
{
"epoch": 74.32,
"learning_rate": 2.230078125e-05,
"loss": 0.8174,
"step": 11000
},
{
"epoch": 74.32,
"eval_loss": 0.5428524613380432,
"eval_runtime": 94.2641,
"eval_samples_per_second": 22.225,
"eval_steps_per_second": 0.7,
"eval_wer": 1.0145474432595023,
"step": 11000
},
{
"epoch": 75.0,
"learning_rate": 2.171484375e-05,
"loss": 0.7876,
"step": 11100
},
{
"epoch": 75.68,
"learning_rate": 2.112890625e-05,
"loss": 0.8226,
"step": 11200
},
{
"epoch": 76.35,
"learning_rate": 2.054296875e-05,
"loss": 0.8177,
"step": 11300
},
{
"epoch": 77.03,
"learning_rate": 1.995703125e-05,
"loss": 0.7941,
"step": 11400
},
{
"epoch": 77.7,
"learning_rate": 1.937109375e-05,
"loss": 0.8168,
"step": 11500
},
{
"epoch": 77.7,
"eval_loss": 0.5561283230781555,
"eval_runtime": 94.9459,
"eval_samples_per_second": 22.065,
"eval_steps_per_second": 0.695,
"eval_wer": 1.0116488925348646,
"step": 11500
},
{
"epoch": 78.38,
"learning_rate": 1.878515625e-05,
"loss": 0.8095,
"step": 11600
},
{
"epoch": 79.05,
"learning_rate": 1.8199218749999996e-05,
"loss": 0.791,
"step": 11700
},
{
"epoch": 79.73,
"learning_rate": 1.7613281249999997e-05,
"loss": 0.7812,
"step": 11800
},
{
"epoch": 80.41,
"learning_rate": 1.7033203125e-05,
"loss": 0.8103,
"step": 11900
},
{
"epoch": 81.08,
"learning_rate": 1.6447265625e-05,
"loss": 0.7872,
"step": 12000
},
{
"epoch": 81.08,
"eval_loss": 0.5477647185325623,
"eval_runtime": 95.0024,
"eval_samples_per_second": 22.052,
"eval_steps_per_second": 0.695,
"eval_wer": 1.0163522012578616,
"step": 12000
},
{
"epoch": 81.76,
"learning_rate": 1.5861328125e-05,
"loss": 0.7658,
"step": 12100
},
{
"epoch": 82.43,
"learning_rate": 1.5275390625e-05,
"loss": 0.7891,
"step": 12200
},
{
"epoch": 83.11,
"learning_rate": 1.4689453124999998e-05,
"loss": 0.7723,
"step": 12300
},
{
"epoch": 83.78,
"learning_rate": 1.4103515624999998e-05,
"loss": 0.7773,
"step": 12400
},
{
"epoch": 84.46,
"learning_rate": 1.3517578124999998e-05,
"loss": 0.7707,
"step": 12500
},
{
"epoch": 84.46,
"eval_loss": 0.5412248373031616,
"eval_runtime": 95.413,
"eval_samples_per_second": 21.957,
"eval_steps_per_second": 0.692,
"eval_wer": 1.021602406343998,
"step": 12500
},
{
"epoch": 85.14,
"learning_rate": 1.2931640624999999e-05,
"loss": 0.7876,
"step": 12600
},
{
"epoch": 85.81,
"learning_rate": 1.2345703124999999e-05,
"loss": 0.7707,
"step": 12700
},
{
"epoch": 86.49,
"learning_rate": 1.1759765624999999e-05,
"loss": 0.7654,
"step": 12800
},
{
"epoch": 87.16,
"learning_rate": 1.1173828124999999e-05,
"loss": 0.77,
"step": 12900
},
{
"epoch": 87.84,
"learning_rate": 1.0587890625e-05,
"loss": 0.7742,
"step": 13000
},
{
"epoch": 87.84,
"eval_loss": 0.5391495227813721,
"eval_runtime": 95.0784,
"eval_samples_per_second": 22.034,
"eval_steps_per_second": 0.694,
"eval_wer": 1.0206726825266612,
"step": 13000
},
{
"epoch": 88.51,
"learning_rate": 1.0007812499999998e-05,
"loss": 0.7534,
"step": 13100
},
{
"epoch": 89.19,
"learning_rate": 9.421874999999999e-06,
"loss": 0.7765,
"step": 13200
},
{
"epoch": 89.86,
"learning_rate": 8.835937499999999e-06,
"loss": 0.7598,
"step": 13300
},
{
"epoch": 90.54,
"learning_rate": 8.249999999999999e-06,
"loss": 0.7564,
"step": 13400
},
{
"epoch": 91.22,
"learning_rate": 7.664062499999999e-06,
"loss": 0.7594,
"step": 13500
},
{
"epoch": 91.22,
"eval_loss": 0.5379434823989868,
"eval_runtime": 95.2181,
"eval_samples_per_second": 22.002,
"eval_steps_per_second": 0.693,
"eval_wer": 1.020836751435603,
"step": 13500
},
{
"epoch": 91.89,
"learning_rate": 7.078124999999999e-06,
"loss": 0.7739,
"step": 13600
},
{
"epoch": 92.57,
"learning_rate": 6.492187499999999e-06,
"loss": 0.7646,
"step": 13700
},
{
"epoch": 93.24,
"learning_rate": 5.9062499999999996e-06,
"loss": 0.7321,
"step": 13800
},
{
"epoch": 93.92,
"learning_rate": 5.3203125e-06,
"loss": 0.7754,
"step": 13900
},
{
"epoch": 94.59,
"learning_rate": 4.734375e-06,
"loss": 0.7678,
"step": 14000
},
{
"epoch": 94.59,
"eval_loss": 0.541484534740448,
"eval_runtime": 92.5358,
"eval_samples_per_second": 22.64,
"eval_steps_per_second": 0.713,
"eval_wer": 1.0197976483456386,
"step": 14000
},
{
"epoch": 95.27,
"learning_rate": 4.1484375e-06,
"loss": 0.7499,
"step": 14100
},
{
"epoch": 95.95,
"learning_rate": 3.5624999999999998e-06,
"loss": 0.7511,
"step": 14200
},
{
"epoch": 96.62,
"learning_rate": 2.9765625e-06,
"loss": 0.7652,
"step": 14300
},
{
"epoch": 97.3,
"learning_rate": 2.3906249999999997e-06,
"loss": 0.7596,
"step": 14400
},
{
"epoch": 97.97,
"learning_rate": 1.8046874999999998e-06,
"loss": 0.7502,
"step": 14500
},
{
"epoch": 97.97,
"eval_loss": 0.5409459471702576,
"eval_runtime": 94.0294,
"eval_samples_per_second": 22.28,
"eval_steps_per_second": 0.702,
"eval_wer": 1.0191413727098715,
"step": 14500
},
{
"epoch": 98.65,
"learning_rate": 1.21875e-06,
"loss": 0.7587,
"step": 14600
},
{
"epoch": 99.32,
"learning_rate": 6.328125e-07,
"loss": 0.7614,
"step": 14700
},
{
"epoch": 100.0,
"learning_rate": 4.6874999999999995e-08,
"loss": 0.7354,
"step": 14800
},
{
"epoch": 100.0,
"step": 14800,
"total_flos": 6.011583745907916e+19,
"train_loss": 1.4159005551724821,
"train_runtime": 24218.0743,
"train_samples_per_second": 19.452,
"train_steps_per_second": 0.611
}
],
"max_steps": 14800,
"num_train_epochs": 100,
"total_flos": 6.011583745907916e+19,
"trial_name": null,
"trial_params": null
}