samitizerxu's picture
Added model files
3b472ed
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 49.4828349944629,
"global_step": 33500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.15,
"learning_rate": 3.6375e-06,
"loss": 124.9665,
"step": 100
},
{
"epoch": 0.3,
"learning_rate": 7.3875e-06,
"loss": 92.673,
"step": 200
},
{
"epoch": 0.44,
"learning_rate": 1.1099999999999999e-05,
"loss": 74.8932,
"step": 300
},
{
"epoch": 0.59,
"learning_rate": 1.485e-05,
"loss": 68.0432,
"step": 400
},
{
"epoch": 0.74,
"learning_rate": 1.8599999999999998e-05,
"loss": 60.2112,
"step": 500
},
{
"epoch": 0.74,
"eval_loss": 64.81886291503906,
"eval_runtime": 129.9516,
"eval_samples_per_second": 15.552,
"eval_steps_per_second": 1.947,
"eval_wer": 1.0,
"step": 500
},
{
"epoch": 0.89,
"learning_rate": 2.2349999999999998e-05,
"loss": 51.3096,
"step": 600
},
{
"epoch": 1.03,
"learning_rate": 2.6099999999999997e-05,
"loss": 39.1106,
"step": 700
},
{
"epoch": 1.18,
"learning_rate": 2.985e-05,
"loss": 26.6843,
"step": 800
},
{
"epoch": 1.33,
"learning_rate": 3.36e-05,
"loss": 14.7864,
"step": 900
},
{
"epoch": 1.48,
"learning_rate": 3.735e-05,
"loss": 8.1128,
"step": 1000
},
{
"epoch": 1.48,
"eval_loss": 6.899676322937012,
"eval_runtime": 115.5788,
"eval_samples_per_second": 17.486,
"eval_steps_per_second": 2.189,
"eval_wer": 1.0,
"step": 1000
},
{
"epoch": 1.62,
"learning_rate": 4.11e-05,
"loss": 6.6068,
"step": 1100
},
{
"epoch": 1.77,
"learning_rate": 4.484999999999999e-05,
"loss": 6.23,
"step": 1200
},
{
"epoch": 1.92,
"learning_rate": 4.8599999999999995e-05,
"loss": 6.0972,
"step": 1300
},
{
"epoch": 2.07,
"learning_rate": 5.234999999999999e-05,
"loss": 6.0595,
"step": 1400
},
{
"epoch": 2.22,
"learning_rate": 5.6099999999999995e-05,
"loss": 6.0492,
"step": 1500
},
{
"epoch": 2.22,
"eval_loss": 5.967654228210449,
"eval_runtime": 115.432,
"eval_samples_per_second": 17.508,
"eval_steps_per_second": 2.192,
"eval_wer": 1.949529935675408,
"step": 1500
},
{
"epoch": 2.36,
"learning_rate": 5.985e-05,
"loss": 6.0266,
"step": 1600
},
{
"epoch": 2.51,
"learning_rate": 6.359999999999999e-05,
"loss": 5.9902,
"step": 1700
},
{
"epoch": 2.66,
"learning_rate": 6.735e-05,
"loss": 5.9762,
"step": 1800
},
{
"epoch": 2.81,
"learning_rate": 7.11e-05,
"loss": 5.9491,
"step": 1900
},
{
"epoch": 2.95,
"learning_rate": 7.484999999999999e-05,
"loss": 5.9326,
"step": 2000
},
{
"epoch": 2.95,
"eval_loss": 5.884542942047119,
"eval_runtime": 114.597,
"eval_samples_per_second": 17.636,
"eval_steps_per_second": 2.208,
"eval_wer": 1.409203364670955,
"step": 2000
},
{
"epoch": 3.1,
"learning_rate": 7.477394034536891e-05,
"loss": 5.9356,
"step": 2100
},
{
"epoch": 3.25,
"learning_rate": 7.453846153846153e-05,
"loss": 5.8889,
"step": 2200
},
{
"epoch": 3.4,
"learning_rate": 7.430298273155415e-05,
"loss": 5.899,
"step": 2300
},
{
"epoch": 3.54,
"learning_rate": 7.406750392464678e-05,
"loss": 5.8824,
"step": 2400
},
{
"epoch": 3.69,
"learning_rate": 7.38320251177394e-05,
"loss": 5.8763,
"step": 2500
},
{
"epoch": 3.69,
"eval_loss": 5.846009731292725,
"eval_runtime": 117.5393,
"eval_samples_per_second": 17.194,
"eval_steps_per_second": 2.152,
"eval_wer": 1.6125680356259278,
"step": 2500
},
{
"epoch": 3.84,
"learning_rate": 7.359654631083201e-05,
"loss": 5.875,
"step": 2600
},
{
"epoch": 3.99,
"learning_rate": 7.336106750392464e-05,
"loss": 5.8671,
"step": 2700
},
{
"epoch": 4.14,
"learning_rate": 7.312558869701726e-05,
"loss": 5.8591,
"step": 2800
},
{
"epoch": 4.28,
"learning_rate": 7.289010989010989e-05,
"loss": 5.8226,
"step": 2900
},
{
"epoch": 4.43,
"learning_rate": 7.265463108320251e-05,
"loss": 5.7888,
"step": 3000
},
{
"epoch": 4.43,
"eval_loss": 5.75445032119751,
"eval_runtime": 114.1832,
"eval_samples_per_second": 17.7,
"eval_steps_per_second": 2.216,
"eval_wer": 2.2033646709549726,
"step": 3000
},
{
"epoch": 4.58,
"learning_rate": 7.241915227629513e-05,
"loss": 5.8041,
"step": 3100
},
{
"epoch": 4.73,
"learning_rate": 7.218367346938774e-05,
"loss": 5.8013,
"step": 3200
},
{
"epoch": 4.87,
"learning_rate": 7.194819466248037e-05,
"loss": 5.7947,
"step": 3300
},
{
"epoch": 5.02,
"learning_rate": 7.171271585557299e-05,
"loss": 5.7802,
"step": 3400
},
{
"epoch": 5.17,
"learning_rate": 7.147723704866562e-05,
"loss": 5.735,
"step": 3500
},
{
"epoch": 5.17,
"eval_loss": 5.677657604217529,
"eval_runtime": 115.6516,
"eval_samples_per_second": 17.475,
"eval_steps_per_second": 2.188,
"eval_wer": 2.334982681840673,
"step": 3500
},
{
"epoch": 5.32,
"learning_rate": 7.124175824175823e-05,
"loss": 5.7198,
"step": 3600
},
{
"epoch": 5.47,
"learning_rate": 7.100627943485086e-05,
"loss": 5.7092,
"step": 3700
},
{
"epoch": 5.61,
"learning_rate": 7.077080062794347e-05,
"loss": 5.6613,
"step": 3800
},
{
"epoch": 5.76,
"learning_rate": 7.05353218210361e-05,
"loss": 5.6579,
"step": 3900
},
{
"epoch": 5.91,
"learning_rate": 7.029984301412873e-05,
"loss": 5.6861,
"step": 4000
},
{
"epoch": 5.91,
"eval_loss": 5.517865180969238,
"eval_runtime": 115.3653,
"eval_samples_per_second": 17.518,
"eval_steps_per_second": 2.193,
"eval_wer": 2.223156853043048,
"step": 4000
},
{
"epoch": 6.06,
"learning_rate": 7.006436420722135e-05,
"loss": 5.6024,
"step": 4100
},
{
"epoch": 6.2,
"learning_rate": 6.982888540031396e-05,
"loss": 5.5497,
"step": 4200
},
{
"epoch": 6.35,
"learning_rate": 6.959340659340659e-05,
"loss": 5.5257,
"step": 4300
},
{
"epoch": 6.5,
"learning_rate": 6.93579277864992e-05,
"loss": 5.4534,
"step": 4400
},
{
"epoch": 6.65,
"learning_rate": 6.912244897959182e-05,
"loss": 5.381,
"step": 4500
},
{
"epoch": 6.65,
"eval_loss": 5.142032146453857,
"eval_runtime": 117.6237,
"eval_samples_per_second": 17.182,
"eval_steps_per_second": 2.151,
"eval_wer": 2.18159327065809,
"step": 4500
},
{
"epoch": 6.79,
"learning_rate": 6.888697017268445e-05,
"loss": 5.3409,
"step": 4600
},
{
"epoch": 6.94,
"learning_rate": 6.865149136577708e-05,
"loss": 5.1283,
"step": 4700
},
{
"epoch": 7.09,
"learning_rate": 6.841601255886969e-05,
"loss": 4.8788,
"step": 4800
},
{
"epoch": 7.24,
"learning_rate": 6.818053375196232e-05,
"loss": 4.7235,
"step": 4900
},
{
"epoch": 7.39,
"learning_rate": 6.794505494505494e-05,
"loss": 4.625,
"step": 5000
},
{
"epoch": 7.39,
"eval_loss": 3.9019837379455566,
"eval_runtime": 116.0971,
"eval_samples_per_second": 17.408,
"eval_steps_per_second": 2.179,
"eval_wer": 2.0722414646214746,
"step": 5000
},
{
"epoch": 7.53,
"learning_rate": 6.770957613814756e-05,
"loss": 4.5404,
"step": 5100
},
{
"epoch": 7.68,
"learning_rate": 6.747409733124018e-05,
"loss": 4.4307,
"step": 5200
},
{
"epoch": 7.83,
"learning_rate": 6.723861852433281e-05,
"loss": 4.3794,
"step": 5300
},
{
"epoch": 7.98,
"learning_rate": 6.700313971742542e-05,
"loss": 4.2786,
"step": 5400
},
{
"epoch": 8.12,
"learning_rate": 6.676766091051805e-05,
"loss": 4.214,
"step": 5500
},
{
"epoch": 8.12,
"eval_loss": 3.339416027069092,
"eval_runtime": 116.9868,
"eval_samples_per_second": 17.275,
"eval_steps_per_second": 2.163,
"eval_wer": 2.1429985155863434,
"step": 5500
},
{
"epoch": 8.27,
"learning_rate": 6.653218210361068e-05,
"loss": 4.1206,
"step": 5600
},
{
"epoch": 8.42,
"learning_rate": 6.62967032967033e-05,
"loss": 4.081,
"step": 5700
},
{
"epoch": 8.57,
"learning_rate": 6.606122448979591e-05,
"loss": 4.0059,
"step": 5800
},
{
"epoch": 8.71,
"learning_rate": 6.582574568288854e-05,
"loss": 3.9251,
"step": 5900
},
{
"epoch": 8.86,
"learning_rate": 6.559262166405023e-05,
"loss": 3.8992,
"step": 6000
},
{
"epoch": 8.86,
"eval_loss": 2.9084665775299072,
"eval_runtime": 119.0907,
"eval_samples_per_second": 16.97,
"eval_steps_per_second": 2.124,
"eval_wer": 2.153389411182583,
"step": 6000
},
{
"epoch": 9.01,
"learning_rate": 6.535714285714285e-05,
"loss": 3.8494,
"step": 6100
},
{
"epoch": 9.16,
"learning_rate": 6.512166405023547e-05,
"loss": 3.7923,
"step": 6200
},
{
"epoch": 9.31,
"learning_rate": 6.48861852433281e-05,
"loss": 3.7416,
"step": 6300
},
{
"epoch": 9.45,
"learning_rate": 6.465070643642071e-05,
"loss": 3.7095,
"step": 6400
},
{
"epoch": 9.6,
"learning_rate": 6.441522762951334e-05,
"loss": 3.6481,
"step": 6500
},
{
"epoch": 9.6,
"eval_loss": 2.620758295059204,
"eval_runtime": 115.2407,
"eval_samples_per_second": 17.537,
"eval_steps_per_second": 2.195,
"eval_wer": 2.3537852548243445,
"step": 6500
},
{
"epoch": 9.75,
"learning_rate": 6.417974882260596e-05,
"loss": 3.6196,
"step": 6600
},
{
"epoch": 9.9,
"learning_rate": 6.394427001569859e-05,
"loss": 3.5941,
"step": 6700
},
{
"epoch": 10.04,
"learning_rate": 6.37087912087912e-05,
"loss": 3.5608,
"step": 6800
},
{
"epoch": 10.19,
"learning_rate": 6.347331240188383e-05,
"loss": 3.5296,
"step": 6900
},
{
"epoch": 10.34,
"learning_rate": 6.324018838304552e-05,
"loss": 3.4658,
"step": 7000
},
{
"epoch": 10.34,
"eval_loss": 2.3172152042388916,
"eval_runtime": 114.5436,
"eval_samples_per_second": 17.644,
"eval_steps_per_second": 2.209,
"eval_wer": 2.227115289460663,
"step": 7000
},
{
"epoch": 10.49,
"learning_rate": 6.300470957613814e-05,
"loss": 3.3977,
"step": 7100
},
{
"epoch": 10.63,
"learning_rate": 6.276923076923076e-05,
"loss": 3.3987,
"step": 7200
},
{
"epoch": 10.78,
"learning_rate": 6.253375196232339e-05,
"loss": 3.3587,
"step": 7300
},
{
"epoch": 10.93,
"learning_rate": 6.2298273155416e-05,
"loss": 3.2796,
"step": 7400
},
{
"epoch": 11.08,
"learning_rate": 6.206279434850863e-05,
"loss": 3.257,
"step": 7500
},
{
"epoch": 11.08,
"eval_loss": 2.0916049480438232,
"eval_runtime": 113.9408,
"eval_samples_per_second": 17.737,
"eval_steps_per_second": 2.22,
"eval_wer": 2.1350816427511132,
"step": 7500
},
{
"epoch": 11.23,
"learning_rate": 6.182731554160125e-05,
"loss": 3.2476,
"step": 7600
},
{
"epoch": 11.37,
"learning_rate": 6.159183673469388e-05,
"loss": 3.2463,
"step": 7700
},
{
"epoch": 11.52,
"learning_rate": 6.135635792778649e-05,
"loss": 3.2323,
"step": 7800
},
{
"epoch": 11.67,
"learning_rate": 6.112087912087912e-05,
"loss": 3.1674,
"step": 7900
},
{
"epoch": 11.82,
"learning_rate": 6.088540031397174e-05,
"loss": 3.1294,
"step": 8000
},
{
"epoch": 11.82,
"eval_loss": 1.895378828048706,
"eval_runtime": 115.1394,
"eval_samples_per_second": 17.553,
"eval_steps_per_second": 2.197,
"eval_wer": 2.2132607619990106,
"step": 8000
},
{
"epoch": 11.96,
"learning_rate": 6.0649921507064355e-05,
"loss": 3.1262,
"step": 8100
},
{
"epoch": 12.11,
"learning_rate": 6.041444270015698e-05,
"loss": 3.0377,
"step": 8200
},
{
"epoch": 12.26,
"learning_rate": 6.01789638932496e-05,
"loss": 3.0306,
"step": 8300
},
{
"epoch": 12.41,
"learning_rate": 5.994348508634223e-05,
"loss": 3.0425,
"step": 8400
},
{
"epoch": 12.56,
"learning_rate": 5.9710361067503915e-05,
"loss": 3.0266,
"step": 8500
},
{
"epoch": 12.56,
"eval_loss": 1.76727294921875,
"eval_runtime": 114.3494,
"eval_samples_per_second": 17.674,
"eval_steps_per_second": 2.213,
"eval_wer": 2.0895596239485403,
"step": 8500
},
{
"epoch": 12.7,
"learning_rate": 5.9474882260596537e-05,
"loss": 3.0398,
"step": 8600
},
{
"epoch": 12.85,
"learning_rate": 5.9239403453689165e-05,
"loss": 2.9985,
"step": 8700
},
{
"epoch": 13.0,
"learning_rate": 5.900392464678179e-05,
"loss": 2.9969,
"step": 8800
},
{
"epoch": 13.15,
"learning_rate": 5.876844583987441e-05,
"loss": 2.9648,
"step": 8900
},
{
"epoch": 13.29,
"learning_rate": 5.8532967032967024e-05,
"loss": 2.9451,
"step": 9000
},
{
"epoch": 13.29,
"eval_loss": 1.665855884552002,
"eval_runtime": 116.4877,
"eval_samples_per_second": 17.349,
"eval_steps_per_second": 2.172,
"eval_wer": 2.1380504700643246,
"step": 9000
},
{
"epoch": 13.44,
"learning_rate": 5.8297488226059645e-05,
"loss": 2.9573,
"step": 9100
},
{
"epoch": 13.59,
"learning_rate": 5.8062009419152274e-05,
"loss": 2.8819,
"step": 9200
},
{
"epoch": 13.74,
"learning_rate": 5.7826530612244896e-05,
"loss": 2.8901,
"step": 9300
},
{
"epoch": 13.88,
"learning_rate": 5.759105180533752e-05,
"loss": 2.8492,
"step": 9400
},
{
"epoch": 14.03,
"learning_rate": 5.735557299843013e-05,
"loss": 2.8802,
"step": 9500
},
{
"epoch": 14.03,
"eval_loss": 1.5637215375900269,
"eval_runtime": 115.1622,
"eval_samples_per_second": 17.549,
"eval_steps_per_second": 2.197,
"eval_wer": 2.1969322117763483,
"step": 9500
},
{
"epoch": 14.18,
"learning_rate": 5.7120094191522754e-05,
"loss": 2.8346,
"step": 9600
},
{
"epoch": 14.33,
"learning_rate": 5.6884615384615376e-05,
"loss": 2.8355,
"step": 9700
},
{
"epoch": 14.48,
"learning_rate": 5.6649136577708005e-05,
"loss": 2.8124,
"step": 9800
},
{
"epoch": 14.62,
"learning_rate": 5.6413657770800626e-05,
"loss": 2.7879,
"step": 9900
},
{
"epoch": 14.77,
"learning_rate": 5.617817896389324e-05,
"loss": 2.78,
"step": 10000
},
{
"epoch": 14.77,
"eval_loss": 1.4921427965164185,
"eval_runtime": 115.1,
"eval_samples_per_second": 17.559,
"eval_steps_per_second": 2.198,
"eval_wer": 2.2335477486392876,
"step": 10000
},
{
"epoch": 14.92,
"learning_rate": 5.594270015698586e-05,
"loss": 2.775,
"step": 10100
},
{
"epoch": 15.07,
"learning_rate": 5.5707221350078485e-05,
"loss": 2.7478,
"step": 10200
},
{
"epoch": 15.21,
"learning_rate": 5.5471742543171114e-05,
"loss": 2.7224,
"step": 10300
},
{
"epoch": 15.36,
"learning_rate": 5.5236263736263735e-05,
"loss": 2.7506,
"step": 10400
},
{
"epoch": 15.51,
"learning_rate": 5.500078492935635e-05,
"loss": 2.7049,
"step": 10500
},
{
"epoch": 15.51,
"eval_loss": 1.413183569908142,
"eval_runtime": 114.2743,
"eval_samples_per_second": 17.686,
"eval_steps_per_second": 2.214,
"eval_wer": 2.221672439386442,
"step": 10500
},
{
"epoch": 15.66,
"learning_rate": 5.476766091051805e-05,
"loss": 2.7145,
"step": 10600
},
{
"epoch": 15.8,
"learning_rate": 5.453218210361067e-05,
"loss": 2.6892,
"step": 10700
},
{
"epoch": 15.95,
"learning_rate": 5.4296703296703295e-05,
"loss": 2.69,
"step": 10800
},
{
"epoch": 16.1,
"learning_rate": 5.406122448979591e-05,
"loss": 2.623,
"step": 10900
},
{
"epoch": 16.25,
"learning_rate": 5.382574568288853e-05,
"loss": 2.6768,
"step": 11000
},
{
"epoch": 16.25,
"eval_loss": 1.3666878938674927,
"eval_runtime": 119.4402,
"eval_samples_per_second": 16.921,
"eval_steps_per_second": 2.118,
"eval_wer": 2.223156853043048,
"step": 11000
},
{
"epoch": 16.4,
"learning_rate": 5.359262166405023e-05,
"loss": 2.628,
"step": 11100
},
{
"epoch": 16.54,
"learning_rate": 5.3357142857142854e-05,
"loss": 2.6163,
"step": 11200
},
{
"epoch": 16.69,
"learning_rate": 5.312166405023547e-05,
"loss": 2.6193,
"step": 11300
},
{
"epoch": 16.84,
"learning_rate": 5.28861852433281e-05,
"loss": 2.6531,
"step": 11400
},
{
"epoch": 16.99,
"learning_rate": 5.265070643642072e-05,
"loss": 2.6358,
"step": 11500
},
{
"epoch": 16.99,
"eval_loss": 1.311090111732483,
"eval_runtime": 116.2157,
"eval_samples_per_second": 17.39,
"eval_steps_per_second": 2.177,
"eval_wer": 2.128649183572489,
"step": 11500
},
{
"epoch": 17.13,
"learning_rate": 5.241522762951334e-05,
"loss": 2.5748,
"step": 11600
},
{
"epoch": 17.28,
"learning_rate": 5.217974882260596e-05,
"loss": 2.6287,
"step": 11700
},
{
"epoch": 17.43,
"learning_rate": 5.194427001569858e-05,
"loss": 2.5583,
"step": 11800
},
{
"epoch": 17.58,
"learning_rate": 5.17087912087912e-05,
"loss": 2.5547,
"step": 11900
},
{
"epoch": 17.72,
"learning_rate": 5.147331240188383e-05,
"loss": 2.5802,
"step": 12000
},
{
"epoch": 17.72,
"eval_loss": 1.2678567171096802,
"eval_runtime": 116.076,
"eval_samples_per_second": 17.411,
"eval_steps_per_second": 2.18,
"eval_wer": 2.1429985155863434,
"step": 12000
},
{
"epoch": 17.87,
"learning_rate": 5.123783359497645e-05,
"loss": 2.557,
"step": 12100
},
{
"epoch": 18.02,
"learning_rate": 5.100235478806907e-05,
"loss": 2.5771,
"step": 12200
},
{
"epoch": 18.17,
"learning_rate": 5.076687598116169e-05,
"loss": 2.5393,
"step": 12300
},
{
"epoch": 18.32,
"learning_rate": 5.053375196232339e-05,
"loss": 2.5031,
"step": 12400
},
{
"epoch": 18.46,
"learning_rate": 5.029827315541601e-05,
"loss": 2.5012,
"step": 12500
},
{
"epoch": 18.46,
"eval_loss": 1.2365446090698242,
"eval_runtime": 116.0118,
"eval_samples_per_second": 17.421,
"eval_steps_per_second": 2.181,
"eval_wer": 2.115289460663038,
"step": 12500
},
{
"epoch": 18.61,
"learning_rate": 5.006279434850863e-05,
"loss": 2.54,
"step": 12600
},
{
"epoch": 18.76,
"learning_rate": 4.9827315541601246e-05,
"loss": 2.5072,
"step": 12700
},
{
"epoch": 18.91,
"learning_rate": 4.9591836734693875e-05,
"loss": 2.4951,
"step": 12800
},
{
"epoch": 19.05,
"learning_rate": 4.9356357927786497e-05,
"loss": 2.4789,
"step": 12900
},
{
"epoch": 19.2,
"learning_rate": 4.912087912087912e-05,
"loss": 2.458,
"step": 13000
},
{
"epoch": 19.2,
"eval_loss": 1.2117862701416016,
"eval_runtime": 116.2579,
"eval_samples_per_second": 17.384,
"eval_steps_per_second": 2.176,
"eval_wer": 2.1573478476001977,
"step": 13000
},
{
"epoch": 19.35,
"learning_rate": 4.888540031397174e-05,
"loss": 2.4616,
"step": 13100
},
{
"epoch": 19.5,
"learning_rate": 4.8649921507064355e-05,
"loss": 2.4739,
"step": 13200
},
{
"epoch": 19.65,
"learning_rate": 4.8414442700156984e-05,
"loss": 2.4867,
"step": 13300
},
{
"epoch": 19.79,
"learning_rate": 4.8178963893249605e-05,
"loss": 2.4568,
"step": 13400
},
{
"epoch": 19.94,
"learning_rate": 4.794348508634223e-05,
"loss": 2.4433,
"step": 13500
},
{
"epoch": 19.94,
"eval_loss": 1.1991767883300781,
"eval_runtime": 114.5641,
"eval_samples_per_second": 17.641,
"eval_steps_per_second": 2.208,
"eval_wer": 2.1335972290945078,
"step": 13500
},
{
"epoch": 20.09,
"learning_rate": 4.770800627943485e-05,
"loss": 2.4532,
"step": 13600
},
{
"epoch": 20.24,
"learning_rate": 4.7472527472527464e-05,
"loss": 2.3913,
"step": 13700
},
{
"epoch": 20.38,
"learning_rate": 4.7237048665620086e-05,
"loss": 2.421,
"step": 13800
},
{
"epoch": 20.53,
"learning_rate": 4.7001569858712714e-05,
"loss": 2.4526,
"step": 13900
},
{
"epoch": 20.68,
"learning_rate": 4.6766091051805336e-05,
"loss": 2.438,
"step": 14000
},
{
"epoch": 20.68,
"eval_loss": 1.180332064628601,
"eval_runtime": 116.5012,
"eval_samples_per_second": 17.347,
"eval_steps_per_second": 2.172,
"eval_wer": 2.1509153884215735,
"step": 14000
},
{
"epoch": 20.83,
"learning_rate": 4.653061224489796e-05,
"loss": 2.4034,
"step": 14100
},
{
"epoch": 20.97,
"learning_rate": 4.629513343799057e-05,
"loss": 2.4306,
"step": 14200
},
{
"epoch": 21.12,
"learning_rate": 4.6059654631083195e-05,
"loss": 2.4145,
"step": 14300
},
{
"epoch": 21.27,
"learning_rate": 4.582417582417582e-05,
"loss": 2.4677,
"step": 14400
},
{
"epoch": 21.42,
"learning_rate": 4.5588697017268445e-05,
"loss": 2.418,
"step": 14500
},
{
"epoch": 21.42,
"eval_loss": 1.1601430177688599,
"eval_runtime": 114.5652,
"eval_samples_per_second": 17.641,
"eval_steps_per_second": 2.208,
"eval_wer": 2.1232063334982683,
"step": 14500
},
{
"epoch": 21.57,
"learning_rate": 4.535321821036107e-05,
"loss": 2.3967,
"step": 14600
},
{
"epoch": 21.71,
"learning_rate": 4.511773940345368e-05,
"loss": 2.3939,
"step": 14700
},
{
"epoch": 21.86,
"learning_rate": 4.4882260596546304e-05,
"loss": 2.3925,
"step": 14800
},
{
"epoch": 22.01,
"learning_rate": 4.4646781789638925e-05,
"loss": 2.3596,
"step": 14900
},
{
"epoch": 22.16,
"learning_rate": 4.4411302982731554e-05,
"loss": 2.3322,
"step": 15000
},
{
"epoch": 22.16,
"eval_loss": 1.1417704820632935,
"eval_runtime": 116.2111,
"eval_samples_per_second": 17.391,
"eval_steps_per_second": 2.177,
"eval_wer": 2.1929737753587335,
"step": 15000
},
{
"epoch": 22.3,
"learning_rate": 4.4175824175824176e-05,
"loss": 2.3821,
"step": 15100
},
{
"epoch": 22.45,
"learning_rate": 4.394034536891679e-05,
"loss": 2.3435,
"step": 15200
},
{
"epoch": 22.6,
"learning_rate": 4.370486656200941e-05,
"loss": 2.3542,
"step": 15300
},
{
"epoch": 22.75,
"learning_rate": 4.3469387755102034e-05,
"loss": 2.3469,
"step": 15400
},
{
"epoch": 22.89,
"learning_rate": 4.323390894819466e-05,
"loss": 2.3387,
"step": 15500
},
{
"epoch": 22.89,
"eval_loss": 1.1172302961349487,
"eval_runtime": 114.3169,
"eval_samples_per_second": 17.679,
"eval_steps_per_second": 2.213,
"eval_wer": 2.2464126669965365,
"step": 15500
},
{
"epoch": 23.04,
"learning_rate": 4.2998430141287285e-05,
"loss": 2.3688,
"step": 15600
},
{
"epoch": 23.19,
"learning_rate": 4.27629513343799e-05,
"loss": 2.3344,
"step": 15700
},
{
"epoch": 23.34,
"learning_rate": 4.252747252747252e-05,
"loss": 2.3245,
"step": 15800
},
{
"epoch": 23.49,
"learning_rate": 4.229199372056514e-05,
"loss": 2.3523,
"step": 15900
},
{
"epoch": 23.63,
"learning_rate": 4.205651491365777e-05,
"loss": 2.3349,
"step": 16000
},
{
"epoch": 23.63,
"eval_loss": 1.1144375801086426,
"eval_runtime": 116.2412,
"eval_samples_per_second": 17.386,
"eval_steps_per_second": 2.177,
"eval_wer": 2.185551707075705,
"step": 16000
},
{
"epoch": 23.78,
"learning_rate": 4.1821036106750393e-05,
"loss": 2.2847,
"step": 16100
},
{
"epoch": 23.93,
"learning_rate": 4.158555729984301e-05,
"loss": 2.3303,
"step": 16200
},
{
"epoch": 24.08,
"learning_rate": 4.135007849293563e-05,
"loss": 2.2994,
"step": 16300
},
{
"epoch": 24.22,
"learning_rate": 4.111459968602825e-05,
"loss": 2.2887,
"step": 16400
},
{
"epoch": 24.37,
"learning_rate": 4.0879120879120874e-05,
"loss": 2.291,
"step": 16500
},
{
"epoch": 24.37,
"eval_loss": 1.1018128395080566,
"eval_runtime": 114.9042,
"eval_samples_per_second": 17.589,
"eval_steps_per_second": 2.202,
"eval_wer": 2.1929737753587335,
"step": 16500
},
{
"epoch": 24.52,
"learning_rate": 4.06436420722135e-05,
"loss": 2.2888,
"step": 16600
},
{
"epoch": 24.67,
"learning_rate": 4.040816326530612e-05,
"loss": 2.2724,
"step": 16700
},
{
"epoch": 24.82,
"learning_rate": 4.017268445839874e-05,
"loss": 2.2922,
"step": 16800
},
{
"epoch": 24.96,
"learning_rate": 3.993720565149136e-05,
"loss": 2.2934,
"step": 16900
},
{
"epoch": 25.11,
"learning_rate": 3.970172684458398e-05,
"loss": 2.2766,
"step": 17000
},
{
"epoch": 25.11,
"eval_loss": 1.0882744789123535,
"eval_runtime": 117.2941,
"eval_samples_per_second": 17.23,
"eval_steps_per_second": 2.157,
"eval_wer": 2.1761504205838693,
"step": 17000
},
{
"epoch": 25.26,
"learning_rate": 3.946624803767661e-05,
"loss": 2.2656,
"step": 17100
},
{
"epoch": 25.41,
"learning_rate": 3.9230769230769226e-05,
"loss": 2.2929,
"step": 17200
},
{
"epoch": 25.55,
"learning_rate": 3.899529042386185e-05,
"loss": 2.2513,
"step": 17300
},
{
"epoch": 25.7,
"learning_rate": 3.875981161695447e-05,
"loss": 2.2603,
"step": 17400
},
{
"epoch": 25.85,
"learning_rate": 3.852433281004709e-05,
"loss": 2.2534,
"step": 17500
},
{
"epoch": 25.85,
"eval_loss": 1.0743526220321655,
"eval_runtime": 118.2043,
"eval_samples_per_second": 17.098,
"eval_steps_per_second": 2.14,
"eval_wer": 2.1875309252845128,
"step": 17500
},
{
"epoch": 26.0,
"learning_rate": 3.8288854003139713e-05,
"loss": 2.2716,
"step": 17600
},
{
"epoch": 26.14,
"learning_rate": 3.8053375196232335e-05,
"loss": 2.2486,
"step": 17700
},
{
"epoch": 26.29,
"learning_rate": 3.781789638932496e-05,
"loss": 2.2068,
"step": 17800
},
{
"epoch": 26.44,
"learning_rate": 3.758241758241758e-05,
"loss": 2.2431,
"step": 17900
},
{
"epoch": 26.59,
"learning_rate": 3.73469387755102e-05,
"loss": 2.2393,
"step": 18000
},
{
"epoch": 26.59,
"eval_loss": 1.0561192035675049,
"eval_runtime": 116.8996,
"eval_samples_per_second": 17.288,
"eval_steps_per_second": 2.164,
"eval_wer": 2.1845620979713014,
"step": 18000
},
{
"epoch": 26.74,
"learning_rate": 3.711145996860282e-05,
"loss": 2.1944,
"step": 18100
},
{
"epoch": 26.88,
"learning_rate": 3.6875981161695444e-05,
"loss": 2.2359,
"step": 18200
},
{
"epoch": 27.03,
"learning_rate": 3.664285714285714e-05,
"loss": 2.2097,
"step": 18300
},
{
"epoch": 27.18,
"learning_rate": 3.640737833594976e-05,
"loss": 2.1431,
"step": 18400
},
{
"epoch": 27.33,
"learning_rate": 3.617189952904238e-05,
"loss": 2.2085,
"step": 18500
},
{
"epoch": 27.33,
"eval_loss": 1.0465816259384155,
"eval_runtime": 115.87,
"eval_samples_per_second": 17.442,
"eval_steps_per_second": 2.183,
"eval_wer": 2.1444829292429493,
"step": 18500
},
{
"epoch": 27.47,
"learning_rate": 3.5936420722135003e-05,
"loss": 2.2204,
"step": 18600
},
{
"epoch": 27.62,
"learning_rate": 3.5700941915227625e-05,
"loss": 2.242,
"step": 18700
},
{
"epoch": 27.77,
"learning_rate": 3.546546310832025e-05,
"loss": 2.1699,
"step": 18800
},
{
"epoch": 27.92,
"learning_rate": 3.522998430141287e-05,
"loss": 2.2152,
"step": 18900
},
{
"epoch": 28.06,
"learning_rate": 3.499450549450549e-05,
"loss": 2.1966,
"step": 19000
},
{
"epoch": 28.06,
"eval_loss": 1.0382250547409058,
"eval_runtime": 116.4655,
"eval_samples_per_second": 17.353,
"eval_steps_per_second": 2.172,
"eval_wer": 2.1088570014844135,
"step": 19000
},
{
"epoch": 28.21,
"learning_rate": 3.475902668759811e-05,
"loss": 2.169,
"step": 19100
},
{
"epoch": 28.36,
"learning_rate": 3.4523547880690734e-05,
"loss": 2.1981,
"step": 19200
},
{
"epoch": 28.51,
"learning_rate": 3.4288069073783356e-05,
"loss": 2.1692,
"step": 19300
},
{
"epoch": 28.66,
"learning_rate": 3.405259026687598e-05,
"loss": 2.1931,
"step": 19400
},
{
"epoch": 28.8,
"learning_rate": 3.38171114599686e-05,
"loss": 2.1794,
"step": 19500
},
{
"epoch": 28.8,
"eval_loss": 1.0263785123825073,
"eval_runtime": 114.5988,
"eval_samples_per_second": 17.635,
"eval_steps_per_second": 2.208,
"eval_wer": 1.9861454725383474,
"step": 19500
},
{
"epoch": 28.95,
"learning_rate": 3.358163265306122e-05,
"loss": 2.1638,
"step": 19600
},
{
"epoch": 29.1,
"learning_rate": 3.334615384615384e-05,
"loss": 2.1714,
"step": 19700
},
{
"epoch": 29.25,
"learning_rate": 3.3110675039246465e-05,
"loss": 2.1514,
"step": 19800
},
{
"epoch": 29.39,
"learning_rate": 3.2875196232339087e-05,
"loss": 2.1374,
"step": 19900
},
{
"epoch": 29.54,
"learning_rate": 3.263971742543171e-05,
"loss": 2.1423,
"step": 20000
},
{
"epoch": 29.54,
"eval_loss": 1.0245550870895386,
"eval_runtime": 116.8375,
"eval_samples_per_second": 17.298,
"eval_steps_per_second": 2.165,
"eval_wer": 1.9678377041068777,
"step": 20000
},
{
"epoch": 29.69,
"learning_rate": 3.240423861852433e-05,
"loss": 2.1807,
"step": 20100
},
{
"epoch": 29.84,
"learning_rate": 3.216875981161695e-05,
"loss": 2.1545,
"step": 20200
},
{
"epoch": 29.98,
"learning_rate": 3.1933281004709574e-05,
"loss": 2.1404,
"step": 20300
},
{
"epoch": 30.13,
"learning_rate": 3.1697802197802195e-05,
"loss": 2.1089,
"step": 20400
},
{
"epoch": 30.28,
"learning_rate": 3.146232339089482e-05,
"loss": 2.1649,
"step": 20500
},
{
"epoch": 30.28,
"eval_loss": 0.9981661438941956,
"eval_runtime": 116.056,
"eval_samples_per_second": 17.414,
"eval_steps_per_second": 2.18,
"eval_wer": 2.000494804552202,
"step": 20500
},
{
"epoch": 30.43,
"learning_rate": 3.122684458398744e-05,
"loss": 2.1425,
"step": 20600
},
{
"epoch": 30.58,
"learning_rate": 3.099136577708006e-05,
"loss": 2.1357,
"step": 20700
},
{
"epoch": 30.72,
"learning_rate": 3.0758241758241755e-05,
"loss": 2.1251,
"step": 20800
},
{
"epoch": 30.87,
"learning_rate": 3.052276295133438e-05,
"loss": 2.1256,
"step": 20900
},
{
"epoch": 31.02,
"learning_rate": 3.0287284144427e-05,
"loss": 2.143,
"step": 21000
},
{
"epoch": 31.02,
"eval_loss": 0.9985482692718506,
"eval_runtime": 116.0424,
"eval_samples_per_second": 17.416,
"eval_steps_per_second": 2.18,
"eval_wer": 2.045027214250371,
"step": 21000
},
{
"epoch": 31.17,
"learning_rate": 3.005180533751962e-05,
"loss": 2.0744,
"step": 21100
},
{
"epoch": 31.31,
"learning_rate": 2.9816326530612242e-05,
"loss": 2.0831,
"step": 21200
},
{
"epoch": 31.46,
"learning_rate": 2.9583202511773936e-05,
"loss": 2.1254,
"step": 21300
},
{
"epoch": 31.61,
"learning_rate": 2.934772370486656e-05,
"loss": 2.1357,
"step": 21400
},
{
"epoch": 31.76,
"learning_rate": 2.911224489795918e-05,
"loss": 2.1338,
"step": 21500
},
{
"epoch": 31.76,
"eval_loss": 0.9932034611701965,
"eval_runtime": 114.6961,
"eval_samples_per_second": 17.62,
"eval_steps_per_second": 2.206,
"eval_wer": 2.0024740227610094,
"step": 21500
},
{
"epoch": 31.91,
"learning_rate": 2.8876766091051805e-05,
"loss": 2.1053,
"step": 21600
},
{
"epoch": 32.05,
"learning_rate": 2.8641287284144426e-05,
"loss": 2.1111,
"step": 21700
},
{
"epoch": 32.2,
"learning_rate": 2.8405808477237045e-05,
"loss": 2.1028,
"step": 21800
},
{
"epoch": 32.35,
"learning_rate": 2.817032967032967e-05,
"loss": 2.0879,
"step": 21900
},
{
"epoch": 32.5,
"learning_rate": 2.793485086342229e-05,
"loss": 2.1076,
"step": 22000
},
{
"epoch": 32.5,
"eval_loss": 0.9902665019035339,
"eval_runtime": 120.6987,
"eval_samples_per_second": 16.744,
"eval_steps_per_second": 2.096,
"eval_wer": 2.0504700643245917,
"step": 22000
},
{
"epoch": 32.64,
"learning_rate": 2.769937205651491e-05,
"loss": 2.1107,
"step": 22100
},
{
"epoch": 32.79,
"learning_rate": 2.7463893249607535e-05,
"loss": 2.0953,
"step": 22200
},
{
"epoch": 32.94,
"learning_rate": 2.7228414442700154e-05,
"loss": 2.0619,
"step": 22300
},
{
"epoch": 33.09,
"learning_rate": 2.6992935635792776e-05,
"loss": 2.0531,
"step": 22400
},
{
"epoch": 33.23,
"learning_rate": 2.6757456828885397e-05,
"loss": 2.0519,
"step": 22500
},
{
"epoch": 33.23,
"eval_loss": 0.9833839535713196,
"eval_runtime": 116.5317,
"eval_samples_per_second": 17.343,
"eval_steps_per_second": 2.171,
"eval_wer": 2.07372587827808,
"step": 22500
},
{
"epoch": 33.38,
"learning_rate": 2.652197802197802e-05,
"loss": 2.0493,
"step": 22600
},
{
"epoch": 33.53,
"learning_rate": 2.6286499215070644e-05,
"loss": 2.0749,
"step": 22700
},
{
"epoch": 33.68,
"learning_rate": 2.6051020408163263e-05,
"loss": 2.0838,
"step": 22800
},
{
"epoch": 33.83,
"learning_rate": 2.5815541601255884e-05,
"loss": 2.0629,
"step": 22900
},
{
"epoch": 33.97,
"learning_rate": 2.5580062794348506e-05,
"loss": 2.0534,
"step": 23000
},
{
"epoch": 33.97,
"eval_loss": 0.9755652546882629,
"eval_runtime": 114.923,
"eval_samples_per_second": 17.586,
"eval_steps_per_second": 2.201,
"eval_wer": 2.024740227610094,
"step": 23000
},
{
"epoch": 34.12,
"learning_rate": 2.5344583987441128e-05,
"loss": 2.067,
"step": 23100
},
{
"epoch": 34.27,
"learning_rate": 2.5109105180533746e-05,
"loss": 2.0252,
"step": 23200
},
{
"epoch": 34.42,
"learning_rate": 2.487362637362637e-05,
"loss": 2.0483,
"step": 23300
},
{
"epoch": 34.56,
"learning_rate": 2.4638147566718993e-05,
"loss": 2.0464,
"step": 23400
},
{
"epoch": 34.71,
"learning_rate": 2.4402668759811615e-05,
"loss": 2.0121,
"step": 23500
},
{
"epoch": 34.71,
"eval_loss": 0.968792736530304,
"eval_runtime": 114.3088,
"eval_samples_per_second": 17.68,
"eval_steps_per_second": 2.213,
"eval_wer": 2.1439881246907473,
"step": 23500
},
{
"epoch": 34.86,
"learning_rate": 2.4167189952904237e-05,
"loss": 2.036,
"step": 23600
},
{
"epoch": 35.01,
"learning_rate": 2.3931711145996855e-05,
"loss": 2.013,
"step": 23700
},
{
"epoch": 35.16,
"learning_rate": 2.369623233908948e-05,
"loss": 2.0043,
"step": 23800
},
{
"epoch": 35.3,
"learning_rate": 2.3460753532182102e-05,
"loss": 2.037,
"step": 23900
},
{
"epoch": 35.45,
"learning_rate": 2.322527472527472e-05,
"loss": 2.0161,
"step": 24000
},
{
"epoch": 35.45,
"eval_loss": 0.9581586718559265,
"eval_runtime": 115.925,
"eval_samples_per_second": 17.434,
"eval_steps_per_second": 2.182,
"eval_wer": 2.1232063334982683,
"step": 24000
},
{
"epoch": 35.6,
"learning_rate": 2.2989795918367346e-05,
"loss": 2.0256,
"step": 24100
},
{
"epoch": 35.75,
"learning_rate": 2.2754317111459968e-05,
"loss": 2.0265,
"step": 24200
},
{
"epoch": 35.89,
"learning_rate": 2.251883830455259e-05,
"loss": 2.0298,
"step": 24300
},
{
"epoch": 36.04,
"learning_rate": 2.228335949764521e-05,
"loss": 2.0028,
"step": 24400
},
{
"epoch": 36.19,
"learning_rate": 2.204788069073783e-05,
"loss": 2.0178,
"step": 24500
},
{
"epoch": 36.19,
"eval_loss": 0.9480372071266174,
"eval_runtime": 116.8212,
"eval_samples_per_second": 17.3,
"eval_steps_per_second": 2.166,
"eval_wer": 2.0895596239485403,
"step": 24500
},
{
"epoch": 36.34,
"learning_rate": 2.1812401883830455e-05,
"loss": 2.008,
"step": 24600
},
{
"epoch": 36.48,
"learning_rate": 2.1576923076923076e-05,
"loss": 2.0132,
"step": 24700
},
{
"epoch": 36.63,
"learning_rate": 2.1341444270015695e-05,
"loss": 2.0204,
"step": 24800
},
{
"epoch": 36.78,
"learning_rate": 2.110596546310832e-05,
"loss": 1.9806,
"step": 24900
},
{
"epoch": 36.93,
"learning_rate": 2.087048665620094e-05,
"loss": 2.0154,
"step": 25000
},
{
"epoch": 36.93,
"eval_loss": 0.9483017325401306,
"eval_runtime": 117.4294,
"eval_samples_per_second": 17.21,
"eval_steps_per_second": 2.154,
"eval_wer": 2.078673923800099,
"step": 25000
},
{
"epoch": 37.08,
"learning_rate": 2.063500784929356e-05,
"loss": 1.997,
"step": 25100
},
{
"epoch": 37.22,
"learning_rate": 2.0399529042386185e-05,
"loss": 1.9712,
"step": 25200
},
{
"epoch": 37.37,
"learning_rate": 2.0164050235478804e-05,
"loss": 2.0131,
"step": 25300
},
{
"epoch": 37.52,
"learning_rate": 1.992857142857143e-05,
"loss": 1.9605,
"step": 25400
},
{
"epoch": 37.67,
"learning_rate": 1.9695447409733123e-05,
"loss": 1.9966,
"step": 25500
},
{
"epoch": 37.67,
"eval_loss": 0.940608024597168,
"eval_runtime": 115.2635,
"eval_samples_per_second": 17.534,
"eval_steps_per_second": 2.195,
"eval_wer": 2.0296882731321126,
"step": 25500
},
{
"epoch": 37.81,
"learning_rate": 1.945996860282574e-05,
"loss": 1.9879,
"step": 25600
},
{
"epoch": 37.96,
"learning_rate": 1.9224489795918367e-05,
"loss": 1.9836,
"step": 25700
},
{
"epoch": 38.11,
"learning_rate": 1.8989010989010988e-05,
"loss": 1.9872,
"step": 25800
},
{
"epoch": 38.26,
"learning_rate": 1.8753532182103607e-05,
"loss": 1.9684,
"step": 25900
},
{
"epoch": 38.4,
"learning_rate": 1.851805337519623e-05,
"loss": 1.9753,
"step": 26000
},
{
"epoch": 38.4,
"eval_loss": 0.9418594837188721,
"eval_runtime": 115.7124,
"eval_samples_per_second": 17.466,
"eval_steps_per_second": 2.186,
"eval_wer": 2.0346363186541314,
"step": 26000
},
{
"epoch": 38.55,
"learning_rate": 1.828257456828885e-05,
"loss": 1.9926,
"step": 26100
},
{
"epoch": 38.7,
"learning_rate": 1.8047095761381475e-05,
"loss": 1.9685,
"step": 26200
},
{
"epoch": 38.85,
"learning_rate": 1.7811616954474097e-05,
"loss": 1.9707,
"step": 26300
},
{
"epoch": 39.0,
"learning_rate": 1.7576138147566716e-05,
"loss": 1.9477,
"step": 26400
},
{
"epoch": 39.14,
"learning_rate": 1.7340659340659337e-05,
"loss": 1.9524,
"step": 26500
},
{
"epoch": 39.14,
"eval_loss": 0.927354097366333,
"eval_runtime": 115.8614,
"eval_samples_per_second": 17.443,
"eval_steps_per_second": 2.184,
"eval_wer": 2.0697674418604652,
"step": 26500
},
{
"epoch": 39.29,
"learning_rate": 1.7105180533751963e-05,
"loss": 1.9673,
"step": 26600
},
{
"epoch": 39.44,
"learning_rate": 1.6869701726844584e-05,
"loss": 1.9802,
"step": 26700
},
{
"epoch": 39.59,
"learning_rate": 1.6634222919937203e-05,
"loss": 1.9408,
"step": 26800
},
{
"epoch": 39.73,
"learning_rate": 1.6398744113029824e-05,
"loss": 1.9482,
"step": 26900
},
{
"epoch": 39.88,
"learning_rate": 1.6163265306122446e-05,
"loss": 1.9427,
"step": 27000
},
{
"epoch": 39.88,
"eval_loss": 0.9232719540596008,
"eval_runtime": 116.3191,
"eval_samples_per_second": 17.375,
"eval_steps_per_second": 2.175,
"eval_wer": 2.078673923800099,
"step": 27000
},
{
"epoch": 40.03,
"learning_rate": 1.592778649921507e-05,
"loss": 1.9653,
"step": 27100
},
{
"epoch": 40.18,
"learning_rate": 1.569230769230769e-05,
"loss": 1.9157,
"step": 27200
},
{
"epoch": 40.32,
"learning_rate": 1.545682888540031e-05,
"loss": 1.9493,
"step": 27300
},
{
"epoch": 40.47,
"learning_rate": 1.5221350078492935e-05,
"loss": 1.8974,
"step": 27400
},
{
"epoch": 40.62,
"learning_rate": 1.4985871271585557e-05,
"loss": 1.9258,
"step": 27500
},
{
"epoch": 40.62,
"eval_loss": 0.9182448983192444,
"eval_runtime": 115.4065,
"eval_samples_per_second": 17.512,
"eval_steps_per_second": 2.192,
"eval_wer": 2.052944087085601,
"step": 27500
},
{
"epoch": 40.77,
"learning_rate": 1.4750392464678177e-05,
"loss": 1.9354,
"step": 27600
},
{
"epoch": 40.92,
"learning_rate": 1.4514913657770799e-05,
"loss": 1.952,
"step": 27700
},
{
"epoch": 41.06,
"learning_rate": 1.4281789638932496e-05,
"loss": 1.9231,
"step": 27800
},
{
"epoch": 41.21,
"learning_rate": 1.4046310832025116e-05,
"loss": 1.9465,
"step": 27900
},
{
"epoch": 41.36,
"learning_rate": 1.3810832025117738e-05,
"loss": 1.9031,
"step": 28000
},
{
"epoch": 41.36,
"eval_loss": 0.9149593114852905,
"eval_runtime": 116.2555,
"eval_samples_per_second": 17.384,
"eval_steps_per_second": 2.176,
"eval_wer": 2.078673923800099,
"step": 28000
},
{
"epoch": 41.51,
"learning_rate": 1.357535321821036e-05,
"loss": 1.9361,
"step": 28100
},
{
"epoch": 41.65,
"learning_rate": 1.3342229199372054e-05,
"loss": 1.916,
"step": 28200
},
{
"epoch": 41.8,
"learning_rate": 1.3106750392464677e-05,
"loss": 1.9149,
"step": 28300
},
{
"epoch": 41.95,
"learning_rate": 1.2871271585557299e-05,
"loss": 1.9037,
"step": 28400
},
{
"epoch": 42.1,
"learning_rate": 1.263579277864992e-05,
"loss": 1.9297,
"step": 28500
},
{
"epoch": 42.1,
"eval_loss": 0.9040070176124573,
"eval_runtime": 113.8901,
"eval_samples_per_second": 17.745,
"eval_steps_per_second": 2.221,
"eval_wer": 2.0504700643245917,
"step": 28500
},
{
"epoch": 42.25,
"learning_rate": 1.2400313971742541e-05,
"loss": 1.8855,
"step": 28600
},
{
"epoch": 42.39,
"learning_rate": 1.2164835164835163e-05,
"loss": 1.9095,
"step": 28700
},
{
"epoch": 42.54,
"learning_rate": 1.1929356357927786e-05,
"loss": 1.8913,
"step": 28800
},
{
"epoch": 42.69,
"learning_rate": 1.1693877551020408e-05,
"loss": 1.8685,
"step": 28900
},
{
"epoch": 42.84,
"learning_rate": 1.1458398744113028e-05,
"loss": 1.9041,
"step": 29000
},
{
"epoch": 42.84,
"eval_loss": 0.9008907675743103,
"eval_runtime": 114.9643,
"eval_samples_per_second": 17.579,
"eval_steps_per_second": 2.201,
"eval_wer": 2.05789213260762,
"step": 29000
},
{
"epoch": 42.98,
"learning_rate": 1.122291993720565e-05,
"loss": 1.8963,
"step": 29100
},
{
"epoch": 43.13,
"learning_rate": 1.0987441130298273e-05,
"loss": 1.9068,
"step": 29200
},
{
"epoch": 43.28,
"learning_rate": 1.0751962323390895e-05,
"loss": 1.9003,
"step": 29300
},
{
"epoch": 43.43,
"learning_rate": 1.0516483516483515e-05,
"loss": 1.891,
"step": 29400
},
{
"epoch": 43.57,
"learning_rate": 1.0281004709576137e-05,
"loss": 1.8929,
"step": 29500
},
{
"epoch": 43.57,
"eval_loss": 0.8968304991722107,
"eval_runtime": 116.4378,
"eval_samples_per_second": 17.357,
"eval_steps_per_second": 2.173,
"eval_wer": 2.032657100445324,
"step": 29500
},
{
"epoch": 43.72,
"learning_rate": 1.0045525902668759e-05,
"loss": 1.8827,
"step": 29600
},
{
"epoch": 43.87,
"learning_rate": 9.810047095761382e-06,
"loss": 1.8862,
"step": 29700
},
{
"epoch": 44.02,
"learning_rate": 9.574568288854002e-06,
"loss": 1.8787,
"step": 29800
},
{
"epoch": 44.17,
"learning_rate": 9.339089481946624e-06,
"loss": 1.8501,
"step": 29900
},
{
"epoch": 44.31,
"learning_rate": 9.103610675039246e-06,
"loss": 1.9077,
"step": 30000
},
{
"epoch": 44.31,
"eval_loss": 0.8953686952590942,
"eval_runtime": 115.4838,
"eval_samples_per_second": 17.5,
"eval_steps_per_second": 2.191,
"eval_wer": 2.061850569025235,
"step": 30000
},
{
"epoch": 44.46,
"learning_rate": 8.868131868131868e-06,
"loss": 1.8804,
"step": 30100
},
{
"epoch": 44.61,
"learning_rate": 8.63265306122449e-06,
"loss": 1.8723,
"step": 30200
},
{
"epoch": 44.76,
"learning_rate": 8.397174254317111e-06,
"loss": 1.8577,
"step": 30300
},
{
"epoch": 44.9,
"learning_rate": 8.161695447409733e-06,
"loss": 1.8811,
"step": 30400
},
{
"epoch": 45.05,
"learning_rate": 7.928571428571429e-06,
"loss": 1.8504,
"step": 30500
},
{
"epoch": 45.05,
"eval_loss": 0.892192542552948,
"eval_runtime": 116.2513,
"eval_samples_per_second": 17.385,
"eval_steps_per_second": 2.176,
"eval_wer": 2.07372587827808,
"step": 30500
},
{
"epoch": 45.2,
"learning_rate": 7.693092621664049e-06,
"loss": 1.861,
"step": 30600
},
{
"epoch": 45.35,
"learning_rate": 7.457613814756671e-06,
"loss": 1.8496,
"step": 30700
},
{
"epoch": 45.49,
"learning_rate": 7.222135007849293e-06,
"loss": 1.8612,
"step": 30800
},
{
"epoch": 45.64,
"learning_rate": 6.986656200941915e-06,
"loss": 1.865,
"step": 30900
},
{
"epoch": 45.79,
"learning_rate": 6.751177394034536e-06,
"loss": 1.8732,
"step": 31000
},
{
"epoch": 45.79,
"eval_loss": 0.8897548317909241,
"eval_runtime": 116.5927,
"eval_samples_per_second": 17.334,
"eval_steps_per_second": 2.17,
"eval_wer": 2.0682830282038593,
"step": 31000
},
{
"epoch": 45.94,
"learning_rate": 6.5156985871271585e-06,
"loss": 1.8374,
"step": 31100
},
{
"epoch": 46.09,
"learning_rate": 6.280219780219779e-06,
"loss": 1.8395,
"step": 31200
},
{
"epoch": 46.23,
"learning_rate": 6.044740973312402e-06,
"loss": 1.8377,
"step": 31300
},
{
"epoch": 46.38,
"learning_rate": 5.809262166405023e-06,
"loss": 1.87,
"step": 31400
},
{
"epoch": 46.53,
"learning_rate": 5.573783359497644e-06,
"loss": 1.877,
"step": 31500
},
{
"epoch": 46.53,
"eval_loss": 0.8848925828933716,
"eval_runtime": 116.1465,
"eval_samples_per_second": 17.4,
"eval_steps_per_second": 2.178,
"eval_wer": 2.0588817417120238,
"step": 31500
},
{
"epoch": 46.68,
"learning_rate": 5.3383045525902665e-06,
"loss": 1.8256,
"step": 31600
},
{
"epoch": 46.82,
"learning_rate": 5.1028257456828875e-06,
"loss": 1.8317,
"step": 31700
},
{
"epoch": 46.97,
"learning_rate": 4.86734693877551e-06,
"loss": 1.8579,
"step": 31800
},
{
"epoch": 47.12,
"learning_rate": 4.631868131868132e-06,
"loss": 1.839,
"step": 31900
},
{
"epoch": 47.27,
"learning_rate": 4.396389324960754e-06,
"loss": 1.8587,
"step": 32000
},
{
"epoch": 47.27,
"eval_loss": 0.8843359351158142,
"eval_runtime": 116.5866,
"eval_samples_per_second": 17.335,
"eval_steps_per_second": 2.17,
"eval_wer": 2.045027214250371,
"step": 32000
},
{
"epoch": 47.41,
"learning_rate": 4.160910518053375e-06,
"loss": 1.8419,
"step": 32100
},
{
"epoch": 47.56,
"learning_rate": 3.925431711145996e-06,
"loss": 1.8639,
"step": 32200
},
{
"epoch": 47.71,
"learning_rate": 3.6899529042386186e-06,
"loss": 1.8395,
"step": 32300
},
{
"epoch": 47.86,
"learning_rate": 3.45447409733124e-06,
"loss": 1.8369,
"step": 32400
},
{
"epoch": 48.01,
"learning_rate": 3.2189952904238617e-06,
"loss": 1.8236,
"step": 32500
},
{
"epoch": 48.01,
"eval_loss": 0.8810222148895264,
"eval_runtime": 115.817,
"eval_samples_per_second": 17.45,
"eval_steps_per_second": 2.184,
"eval_wer": 2.0554181098466104,
"step": 32500
},
{
"epoch": 48.15,
"learning_rate": 2.9835164835164835e-06,
"loss": 1.8468,
"step": 32600
},
{
"epoch": 48.3,
"learning_rate": 2.7503924646781788e-06,
"loss": 1.8326,
"step": 32700
},
{
"epoch": 48.45,
"learning_rate": 2.5149136577708006e-06,
"loss": 1.8279,
"step": 32800
},
{
"epoch": 48.6,
"learning_rate": 2.2794348508634223e-06,
"loss": 1.8324,
"step": 32900
},
{
"epoch": 48.74,
"learning_rate": 2.043956043956044e-06,
"loss": 1.8392,
"step": 33000
},
{
"epoch": 48.74,
"eval_loss": 0.8820456266403198,
"eval_runtime": 115.6891,
"eval_samples_per_second": 17.469,
"eval_steps_per_second": 2.187,
"eval_wer": 2.0573973280554183,
"step": 33000
},
{
"epoch": 48.89,
"learning_rate": 1.8084772370486653e-06,
"loss": 1.8363,
"step": 33100
},
{
"epoch": 49.04,
"learning_rate": 1.572998430141287e-06,
"loss": 1.7996,
"step": 33200
},
{
"epoch": 49.19,
"learning_rate": 1.3375196232339088e-06,
"loss": 1.8113,
"step": 33300
},
{
"epoch": 49.34,
"learning_rate": 1.1020408163265304e-06,
"loss": 1.8428,
"step": 33400
},
{
"epoch": 49.48,
"learning_rate": 8.665620094191522e-07,
"loss": 1.8428,
"step": 33500
},
{
"epoch": 49.48,
"eval_loss": 0.8815611600875854,
"eval_runtime": 117.0058,
"eval_samples_per_second": 17.273,
"eval_steps_per_second": 2.162,
"eval_wer": 2.066798614547254,
"step": 33500
}
],
"max_steps": 33850,
"num_train_epochs": 50,
"total_flos": 1.524607779428202e+20,
"trial_name": null,
"trial_params": null
}