xls-r-300m-sv-cv8 / trainer_state.json
hf-test's picture
End of training
2b65b1d
raw
history blame
31.4 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 49.998632010943915,
"global_step": 18250,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.27,
"learning_rate": 3.7125e-06,
"loss": 12.2134,
"step": 100
},
{
"epoch": 0.55,
"learning_rate": 7.4625e-06,
"loss": 6.115,
"step": 200
},
{
"epoch": 0.82,
"learning_rate": 1.1212499999999998e-05,
"loss": 4.1403,
"step": 300
},
{
"epoch": 1.1,
"learning_rate": 1.49625e-05,
"loss": 3.6727,
"step": 400
},
{
"epoch": 1.37,
"learning_rate": 1.8712499999999997e-05,
"loss": 3.3224,
"step": 500
},
{
"epoch": 1.37,
"eval_loss": 3.267564058303833,
"eval_runtime": 144.3422,
"eval_samples_per_second": 33.552,
"eval_steps_per_second": 4.198,
"eval_wer": 1.0,
"step": 500
},
{
"epoch": 1.64,
"learning_rate": 2.2462499999999997e-05,
"loss": 3.1389,
"step": 600
},
{
"epoch": 1.92,
"learning_rate": 2.6212499999999997e-05,
"loss": 3.069,
"step": 700
},
{
"epoch": 2.19,
"learning_rate": 2.99625e-05,
"loss": 3.0412,
"step": 800
},
{
"epoch": 2.47,
"learning_rate": 3.37125e-05,
"loss": 2.9725,
"step": 900
},
{
"epoch": 2.74,
"learning_rate": 3.7462499999999996e-05,
"loss": 2.9319,
"step": 1000
},
{
"epoch": 2.74,
"eval_loss": 2.928657054901123,
"eval_runtime": 144.855,
"eval_samples_per_second": 33.433,
"eval_steps_per_second": 4.183,
"eval_wer": 1.0000283848992335,
"step": 1000
},
{
"epoch": 3.01,
"learning_rate": 4.12125e-05,
"loss": 2.9171,
"step": 1100
},
{
"epoch": 3.29,
"learning_rate": 4.4962499999999995e-05,
"loss": 2.8588,
"step": 1200
},
{
"epoch": 3.56,
"learning_rate": 4.871249999999999e-05,
"loss": 2.7606,
"step": 1300
},
{
"epoch": 3.83,
"learning_rate": 5.2462499999999994e-05,
"loss": 2.4333,
"step": 1400
},
{
"epoch": 4.11,
"learning_rate": 5.62125e-05,
"loss": 2.1173,
"step": 1500
},
{
"epoch": 4.11,
"eval_loss": 1.147773265838623,
"eval_runtime": 146.3922,
"eval_samples_per_second": 33.082,
"eval_steps_per_second": 4.14,
"eval_wer": 0.8787680953732614,
"step": 1500
},
{
"epoch": 4.38,
"learning_rate": 5.9962499999999994e-05,
"loss": 1.9512,
"step": 1600
},
{
"epoch": 4.66,
"learning_rate": 6.37125e-05,
"loss": 1.8501,
"step": 1700
},
{
"epoch": 4.93,
"learning_rate": 6.746249999999999e-05,
"loss": 1.7846,
"step": 1800
},
{
"epoch": 5.21,
"learning_rate": 7.121249999999999e-05,
"loss": 1.7365,
"step": 1900
},
{
"epoch": 5.48,
"learning_rate": 7.49625e-05,
"loss": 1.6973,
"step": 2000
},
{
"epoch": 5.48,
"eval_loss": 0.6748705506324768,
"eval_runtime": 146.3732,
"eval_samples_per_second": 33.087,
"eval_steps_per_second": 4.14,
"eval_wer": 0.6546693159239285,
"step": 2000
},
{
"epoch": 5.75,
"learning_rate": 7.45476923076923e-05,
"loss": 1.6824,
"step": 2100
},
{
"epoch": 6.03,
"learning_rate": 7.408615384615384e-05,
"loss": 1.6642,
"step": 2200
},
{
"epoch": 6.3,
"learning_rate": 7.362461538461538e-05,
"loss": 1.6098,
"step": 2300
},
{
"epoch": 6.57,
"learning_rate": 7.316769230769229e-05,
"loss": 1.6063,
"step": 2400
},
{
"epoch": 6.85,
"learning_rate": 7.270615384615384e-05,
"loss": 1.5865,
"step": 2500
},
{
"epoch": 6.85,
"eval_loss": 0.5499770641326904,
"eval_runtime": 146.214,
"eval_samples_per_second": 33.123,
"eval_steps_per_second": 4.145,
"eval_wer": 0.5633550950894124,
"step": 2500
},
{
"epoch": 7.12,
"learning_rate": 7.224461538461538e-05,
"loss": 1.5758,
"step": 2600
},
{
"epoch": 7.4,
"learning_rate": 7.178307692307692e-05,
"loss": 1.5329,
"step": 2700
},
{
"epoch": 7.67,
"learning_rate": 7.132153846153845e-05,
"loss": 1.5395,
"step": 2800
},
{
"epoch": 7.94,
"learning_rate": 7.086e-05,
"loss": 1.5163,
"step": 2900
},
{
"epoch": 8.22,
"learning_rate": 7.039846153846153e-05,
"loss": 1.5094,
"step": 3000
},
{
"epoch": 8.22,
"eval_loss": 0.4839838743209839,
"eval_runtime": 146.4688,
"eval_samples_per_second": 33.065,
"eval_steps_per_second": 4.137,
"eval_wer": 0.542974737439682,
"step": 3000
},
{
"epoch": 8.49,
"learning_rate": 6.993692307692307e-05,
"loss": 1.4862,
"step": 3100
},
{
"epoch": 8.77,
"learning_rate": 6.94753846153846e-05,
"loss": 1.4738,
"step": 3200
},
{
"epoch": 9.04,
"learning_rate": 6.901384615384615e-05,
"loss": 1.4667,
"step": 3300
},
{
"epoch": 9.31,
"learning_rate": 6.855230769230769e-05,
"loss": 1.4564,
"step": 3400
},
{
"epoch": 9.59,
"learning_rate": 6.809076923076922e-05,
"loss": 1.4644,
"step": 3500
},
{
"epoch": 9.59,
"eval_loss": 0.48440465331077576,
"eval_runtime": 146.4275,
"eval_samples_per_second": 33.074,
"eval_steps_per_second": 4.139,
"eval_wer": 0.4142492194152711,
"step": 3500
},
{
"epoch": 9.86,
"learning_rate": 6.762923076923076e-05,
"loss": 1.4389,
"step": 3600
},
{
"epoch": 10.14,
"learning_rate": 6.716769230769231e-05,
"loss": 1.437,
"step": 3700
},
{
"epoch": 10.41,
"learning_rate": 6.670615384615384e-05,
"loss": 1.4278,
"step": 3800
},
{
"epoch": 10.68,
"learning_rate": 6.624461538461537e-05,
"loss": 1.4063,
"step": 3900
},
{
"epoch": 10.96,
"learning_rate": 6.578307692307691e-05,
"loss": 1.4061,
"step": 4000
},
{
"epoch": 10.96,
"eval_loss": 0.4355711340904236,
"eval_runtime": 146.0305,
"eval_samples_per_second": 33.164,
"eval_steps_per_second": 4.15,
"eval_wer": 0.3808401930173148,
"step": 4000
},
{
"epoch": 11.23,
"learning_rate": 6.532153846153845e-05,
"loss": 1.3829,
"step": 4100
},
{
"epoch": 11.51,
"learning_rate": 6.486e-05,
"loss": 1.3993,
"step": 4200
},
{
"epoch": 11.78,
"learning_rate": 6.439846153846153e-05,
"loss": 1.3875,
"step": 4300
},
{
"epoch": 12.05,
"learning_rate": 6.393692307692306e-05,
"loss": 1.4017,
"step": 4400
},
{
"epoch": 12.33,
"learning_rate": 6.347538461538461e-05,
"loss": 1.3584,
"step": 4500
},
{
"epoch": 12.33,
"eval_loss": 0.41921573877334595,
"eval_runtime": 145.7061,
"eval_samples_per_second": 33.238,
"eval_steps_per_second": 4.159,
"eval_wer": 0.3697700823162078,
"step": 4500
},
{
"epoch": 12.6,
"learning_rate": 6.301846153846153e-05,
"loss": 1.3732,
"step": 4600
},
{
"epoch": 12.88,
"learning_rate": 6.255692307692307e-05,
"loss": 1.3719,
"step": 4700
},
{
"epoch": 13.15,
"learning_rate": 6.20953846153846e-05,
"loss": 1.3742,
"step": 4800
},
{
"epoch": 13.42,
"learning_rate": 6.163384615384614e-05,
"loss": 1.3416,
"step": 4900
},
{
"epoch": 13.7,
"learning_rate": 6.117230769230768e-05,
"loss": 1.3438,
"step": 5000
},
{
"epoch": 13.7,
"eval_loss": 0.39804813265800476,
"eval_runtime": 146.4763,
"eval_samples_per_second": 33.063,
"eval_steps_per_second": 4.137,
"eval_wer": 0.3583877377235311,
"step": 5000
},
{
"epoch": 13.97,
"learning_rate": 6.0710769230769224e-05,
"loss": 1.3397,
"step": 5100
},
{
"epoch": 14.25,
"learning_rate": 6.024923076923076e-05,
"loss": 1.3228,
"step": 5200
},
{
"epoch": 14.52,
"learning_rate": 5.97876923076923e-05,
"loss": 1.3181,
"step": 5300
},
{
"epoch": 14.79,
"learning_rate": 5.9326153846153835e-05,
"loss": 1.3505,
"step": 5400
},
{
"epoch": 15.07,
"learning_rate": 5.886461538461538e-05,
"loss": 1.3332,
"step": 5500
},
{
"epoch": 15.07,
"eval_loss": 0.3896285891532898,
"eval_runtime": 145.3992,
"eval_samples_per_second": 33.308,
"eval_steps_per_second": 4.168,
"eval_wer": 0.3571671870564859,
"step": 5500
},
{
"epoch": 15.34,
"learning_rate": 5.840307692307692e-05,
"loss": 1.3081,
"step": 5600
},
{
"epoch": 15.62,
"learning_rate": 5.794153846153845e-05,
"loss": 1.3305,
"step": 5700
},
{
"epoch": 15.89,
"learning_rate": 5.747999999999999e-05,
"loss": 1.3206,
"step": 5800
},
{
"epoch": 16.16,
"learning_rate": 5.702307692307692e-05,
"loss": 1.3205,
"step": 5900
},
{
"epoch": 16.44,
"learning_rate": 5.6561538461538456e-05,
"loss": 1.3025,
"step": 6000
},
{
"epoch": 16.44,
"eval_loss": 0.3834877014160156,
"eval_runtime": 145.719,
"eval_samples_per_second": 33.235,
"eval_steps_per_second": 4.159,
"eval_wer": 0.34870848708487084,
"step": 6000
},
{
"epoch": 16.71,
"learning_rate": 5.6099999999999995e-05,
"loss": 1.3008,
"step": 6100
},
{
"epoch": 16.98,
"learning_rate": 5.5638461538461534e-05,
"loss": 1.2926,
"step": 6200
},
{
"epoch": 17.26,
"learning_rate": 5.5176923076923067e-05,
"loss": 1.2871,
"step": 6300
},
{
"epoch": 17.53,
"learning_rate": 5.471538461538461e-05,
"loss": 1.2688,
"step": 6400
},
{
"epoch": 17.81,
"learning_rate": 5.425384615384615e-05,
"loss": 1.2979,
"step": 6500
},
{
"epoch": 17.81,
"eval_loss": 0.37809377908706665,
"eval_runtime": 145.3953,
"eval_samples_per_second": 33.309,
"eval_steps_per_second": 4.168,
"eval_wer": 0.3416974169741697,
"step": 6500
},
{
"epoch": 18.08,
"learning_rate": 5.3792307692307684e-05,
"loss": 1.2865,
"step": 6600
},
{
"epoch": 18.36,
"learning_rate": 5.3330769230769223e-05,
"loss": 1.2795,
"step": 6700
},
{
"epoch": 18.63,
"learning_rate": 5.286923076923076e-05,
"loss": 1.271,
"step": 6800
},
{
"epoch": 18.9,
"learning_rate": 5.240769230769231e-05,
"loss": 1.2595,
"step": 6900
},
{
"epoch": 19.18,
"learning_rate": 5.194615384615384e-05,
"loss": 1.2736,
"step": 7000
},
{
"epoch": 19.18,
"eval_loss": 0.37344905734062195,
"eval_runtime": 145.9621,
"eval_samples_per_second": 33.18,
"eval_steps_per_second": 4.152,
"eval_wer": 0.32699403917116093,
"step": 7000
},
{
"epoch": 19.45,
"learning_rate": 5.148461538461538e-05,
"loss": 1.2619,
"step": 7100
},
{
"epoch": 19.73,
"learning_rate": 5.102307692307692e-05,
"loss": 1.2351,
"step": 7200
},
{
"epoch": 20.0,
"learning_rate": 5.056153846153845e-05,
"loss": 1.277,
"step": 7300
},
{
"epoch": 20.27,
"learning_rate": 5.01e-05,
"loss": 1.2559,
"step": 7400
},
{
"epoch": 20.55,
"learning_rate": 4.963846153846154e-05,
"loss": 1.2415,
"step": 7500
},
{
"epoch": 20.55,
"eval_loss": 0.3636943995952606,
"eval_runtime": 146.1853,
"eval_samples_per_second": 33.129,
"eval_steps_per_second": 4.145,
"eval_wer": 0.3316491626454726,
"step": 7500
},
{
"epoch": 20.82,
"learning_rate": 4.917692307692307e-05,
"loss": 1.2383,
"step": 7600
},
{
"epoch": 21.1,
"learning_rate": 4.871538461538461e-05,
"loss": 1.2373,
"step": 7700
},
{
"epoch": 21.37,
"learning_rate": 4.825384615384615e-05,
"loss": 1.2197,
"step": 7800
},
{
"epoch": 21.64,
"learning_rate": 4.7792307692307694e-05,
"loss": 1.2291,
"step": 7900
},
{
"epoch": 21.92,
"learning_rate": 4.733076923076923e-05,
"loss": 1.2255,
"step": 8000
},
{
"epoch": 21.92,
"eval_loss": 0.35461699962615967,
"eval_runtime": 145.4805,
"eval_samples_per_second": 33.29,
"eval_steps_per_second": 4.166,
"eval_wer": 0.3146749929037752,
"step": 8000
},
{
"epoch": 22.19,
"learning_rate": 4.6869230769230766e-05,
"loss": 1.2578,
"step": 8100
},
{
"epoch": 22.47,
"learning_rate": 4.64076923076923e-05,
"loss": 1.2145,
"step": 8200
},
{
"epoch": 22.74,
"learning_rate": 4.594615384615384e-05,
"loss": 1.2263,
"step": 8300
},
{
"epoch": 23.01,
"learning_rate": 4.5484615384615384e-05,
"loss": 1.2557,
"step": 8400
},
{
"epoch": 23.29,
"learning_rate": 4.502307692307692e-05,
"loss": 1.2193,
"step": 8500
},
{
"epoch": 23.29,
"eval_loss": 0.35236233472824097,
"eval_runtime": 145.9059,
"eval_samples_per_second": 33.193,
"eval_steps_per_second": 4.153,
"eval_wer": 0.31964235026965654,
"step": 8500
},
{
"epoch": 23.56,
"learning_rate": 4.456615384615384e-05,
"loss": 1.2092,
"step": 8600
},
{
"epoch": 23.83,
"learning_rate": 4.410461538461538e-05,
"loss": 1.222,
"step": 8700
},
{
"epoch": 24.11,
"learning_rate": 4.364307692307692e-05,
"loss": 1.2004,
"step": 8800
},
{
"epoch": 24.38,
"learning_rate": 4.318153846153846e-05,
"loss": 1.2121,
"step": 8900
},
{
"epoch": 24.66,
"learning_rate": 4.272e-05,
"loss": 1.2104,
"step": 9000
},
{
"epoch": 24.66,
"eval_loss": 0.3402980864048004,
"eval_runtime": 146.1669,
"eval_samples_per_second": 33.133,
"eval_steps_per_second": 4.146,
"eval_wer": 0.30967925063866025,
"step": 9000
},
{
"epoch": 24.93,
"learning_rate": 4.225846153846153e-05,
"loss": 1.2001,
"step": 9100
},
{
"epoch": 25.21,
"learning_rate": 4.179692307692307e-05,
"loss": 1.2004,
"step": 9200
},
{
"epoch": 25.48,
"learning_rate": 4.1335384615384615e-05,
"loss": 1.1803,
"step": 9300
},
{
"epoch": 25.75,
"learning_rate": 4.0873846153846154e-05,
"loss": 1.1934,
"step": 9400
},
{
"epoch": 26.03,
"learning_rate": 4.041230769230769e-05,
"loss": 1.1965,
"step": 9500
},
{
"epoch": 26.03,
"eval_loss": 0.35078364610671997,
"eval_runtime": 145.889,
"eval_samples_per_second": 33.196,
"eval_steps_per_second": 4.154,
"eval_wer": 0.30931024694862336,
"step": 9500
},
{
"epoch": 26.3,
"learning_rate": 3.9950769230769226e-05,
"loss": 1.1909,
"step": 9600
},
{
"epoch": 26.57,
"learning_rate": 3.948923076923076e-05,
"loss": 1.1886,
"step": 9700
},
{
"epoch": 26.85,
"learning_rate": 3.9027692307692305e-05,
"loss": 1.1785,
"step": 9800
},
{
"epoch": 27.12,
"learning_rate": 3.8566153846153844e-05,
"loss": 1.1675,
"step": 9900
},
{
"epoch": 27.4,
"learning_rate": 3.810461538461538e-05,
"loss": 1.1976,
"step": 10000
},
{
"epoch": 27.4,
"eval_loss": 0.34191542863845825,
"eval_runtime": 145.8469,
"eval_samples_per_second": 33.206,
"eval_steps_per_second": 4.155,
"eval_wer": 0.30712460970763555,
"step": 10000
},
{
"epoch": 27.67,
"learning_rate": 3.7643076923076916e-05,
"loss": 1.1729,
"step": 10100
},
{
"epoch": 27.94,
"learning_rate": 3.718153846153846e-05,
"loss": 1.1852,
"step": 10200
},
{
"epoch": 28.22,
"learning_rate": 3.6719999999999994e-05,
"loss": 1.1881,
"step": 10300
},
{
"epoch": 28.49,
"learning_rate": 3.625846153846153e-05,
"loss": 1.1632,
"step": 10400
},
{
"epoch": 28.77,
"learning_rate": 3.580153846153846e-05,
"loss": 1.182,
"step": 10500
},
{
"epoch": 28.77,
"eval_loss": 0.3363555371761322,
"eval_runtime": 146.1933,
"eval_samples_per_second": 33.127,
"eval_steps_per_second": 4.145,
"eval_wer": 0.2963383479988646,
"step": 10500
},
{
"epoch": 29.04,
"learning_rate": 3.534e-05,
"loss": 1.1535,
"step": 10600
},
{
"epoch": 29.31,
"learning_rate": 3.4878461538461536e-05,
"loss": 1.1636,
"step": 10700
},
{
"epoch": 29.59,
"learning_rate": 3.4416923076923076e-05,
"loss": 1.1496,
"step": 10800
},
{
"epoch": 29.86,
"learning_rate": 3.3955384615384615e-05,
"loss": 1.1507,
"step": 10900
},
{
"epoch": 30.14,
"learning_rate": 3.349384615384615e-05,
"loss": 1.158,
"step": 11000
},
{
"epoch": 30.14,
"eval_loss": 0.333802729845047,
"eval_runtime": 145.3999,
"eval_samples_per_second": 33.308,
"eval_steps_per_second": 4.168,
"eval_wer": 0.2931876241839341,
"step": 11000
},
{
"epoch": 30.41,
"learning_rate": 3.303230769230769e-05,
"loss": 1.1527,
"step": 11100
},
{
"epoch": 30.68,
"learning_rate": 3.2570769230769226e-05,
"loss": 1.1484,
"step": 11200
},
{
"epoch": 30.96,
"learning_rate": 3.2109230769230765e-05,
"loss": 1.145,
"step": 11300
},
{
"epoch": 31.23,
"learning_rate": 3.1647692307692304e-05,
"loss": 1.1388,
"step": 11400
},
{
"epoch": 31.51,
"learning_rate": 3.118615384615384e-05,
"loss": 1.1414,
"step": 11500
},
{
"epoch": 31.51,
"eval_loss": 0.3376125693321228,
"eval_runtime": 147.5034,
"eval_samples_per_second": 32.833,
"eval_steps_per_second": 4.108,
"eval_wer": 0.29398240136247517,
"step": 11500
},
{
"epoch": 31.78,
"learning_rate": 3.072461538461538e-05,
"loss": 1.1351,
"step": 11600
},
{
"epoch": 32.05,
"learning_rate": 3.026307692307692e-05,
"loss": 1.1228,
"step": 11700
},
{
"epoch": 32.33,
"learning_rate": 2.980153846153846e-05,
"loss": 1.1443,
"step": 11800
},
{
"epoch": 32.6,
"learning_rate": 2.9339999999999997e-05,
"loss": 1.1335,
"step": 11900
},
{
"epoch": 32.88,
"learning_rate": 2.8878461538461536e-05,
"loss": 1.1402,
"step": 12000
},
{
"epoch": 32.88,
"eval_loss": 0.3370179235935211,
"eval_runtime": 146.1673,
"eval_samples_per_second": 33.133,
"eval_steps_per_second": 4.146,
"eval_wer": 0.2891285835935282,
"step": 12000
},
{
"epoch": 33.15,
"learning_rate": 2.8416923076923075e-05,
"loss": 1.1435,
"step": 12100
},
{
"epoch": 33.42,
"learning_rate": 2.795538461538461e-05,
"loss": 1.1122,
"step": 12200
},
{
"epoch": 33.7,
"learning_rate": 2.7493846153846154e-05,
"loss": 1.1107,
"step": 12300
},
{
"epoch": 33.97,
"learning_rate": 2.703230769230769e-05,
"loss": 1.1126,
"step": 12400
},
{
"epoch": 34.25,
"learning_rate": 2.657076923076923e-05,
"loss": 1.1213,
"step": 12500
},
{
"epoch": 34.25,
"eval_loss": 0.3200843334197998,
"eval_runtime": 146.0978,
"eval_samples_per_second": 33.149,
"eval_steps_per_second": 4.148,
"eval_wer": 0.28742548963951176,
"step": 12500
},
{
"epoch": 34.52,
"learning_rate": 2.6109230769230768e-05,
"loss": 1.1115,
"step": 12600
},
{
"epoch": 34.79,
"learning_rate": 2.5647692307692304e-05,
"loss": 1.1069,
"step": 12700
},
{
"epoch": 35.07,
"learning_rate": 2.5186153846153843e-05,
"loss": 1.1154,
"step": 12800
},
{
"epoch": 35.34,
"learning_rate": 2.4729230769230764e-05,
"loss": 1.1079,
"step": 12900
},
{
"epoch": 35.62,
"learning_rate": 2.4267692307692307e-05,
"loss": 1.1207,
"step": 13000
},
{
"epoch": 35.62,
"eval_loss": 0.3260834515094757,
"eval_runtime": 147.1601,
"eval_samples_per_second": 32.91,
"eval_steps_per_second": 4.118,
"eval_wer": 0.28262844166903206,
"step": 13000
},
{
"epoch": 35.89,
"learning_rate": 2.3806153846153843e-05,
"loss": 1.1034,
"step": 13100
},
{
"epoch": 36.16,
"learning_rate": 2.3344615384615386e-05,
"loss": 1.1157,
"step": 13200
},
{
"epoch": 36.44,
"learning_rate": 2.288307692307692e-05,
"loss": 1.0868,
"step": 13300
},
{
"epoch": 36.71,
"learning_rate": 2.242153846153846e-05,
"loss": 1.0985,
"step": 13400
},
{
"epoch": 36.98,
"learning_rate": 2.196e-05,
"loss": 1.1074,
"step": 13500
},
{
"epoch": 36.98,
"eval_loss": 0.3117202818393707,
"eval_runtime": 145.4969,
"eval_samples_per_second": 33.286,
"eval_steps_per_second": 4.165,
"eval_wer": 0.2785977859778598,
"step": 13500
},
{
"epoch": 37.26,
"learning_rate": 2.1498461538461536e-05,
"loss": 1.0955,
"step": 13600
},
{
"epoch": 37.53,
"learning_rate": 2.1036923076923075e-05,
"loss": 1.0826,
"step": 13700
},
{
"epoch": 37.81,
"learning_rate": 2.0575384615384614e-05,
"loss": 1.0897,
"step": 13800
},
{
"epoch": 38.08,
"learning_rate": 2.0113846153846153e-05,
"loss": 1.0992,
"step": 13900
},
{
"epoch": 38.36,
"learning_rate": 1.965230769230769e-05,
"loss": 1.0818,
"step": 14000
},
{
"epoch": 38.36,
"eval_loss": 0.3194292485713959,
"eval_runtime": 145.4744,
"eval_samples_per_second": 33.291,
"eval_steps_per_second": 4.166,
"eval_wer": 0.2775759296054499,
"step": 14000
},
{
"epoch": 38.63,
"learning_rate": 1.919076923076923e-05,
"loss": 1.0812,
"step": 14100
},
{
"epoch": 38.9,
"learning_rate": 1.8729230769230768e-05,
"loss": 1.0851,
"step": 14200
},
{
"epoch": 39.18,
"learning_rate": 1.8267692307692307e-05,
"loss": 1.0824,
"step": 14300
},
{
"epoch": 39.45,
"learning_rate": 1.7806153846153843e-05,
"loss": 1.1003,
"step": 14400
},
{
"epoch": 39.73,
"learning_rate": 1.7344615384615382e-05,
"loss": 1.0889,
"step": 14500
},
{
"epoch": 39.73,
"eval_loss": 0.31878727674484253,
"eval_runtime": 145.398,
"eval_samples_per_second": 33.309,
"eval_steps_per_second": 4.168,
"eval_wer": 0.2738291229066137,
"step": 14500
},
{
"epoch": 40.0,
"learning_rate": 1.688307692307692e-05,
"loss": 1.0847,
"step": 14600
},
{
"epoch": 40.27,
"learning_rate": 1.642153846153846e-05,
"loss": 1.0707,
"step": 14700
},
{
"epoch": 40.55,
"learning_rate": 1.5959999999999996e-05,
"loss": 1.0664,
"step": 14800
},
{
"epoch": 40.82,
"learning_rate": 1.5498461538461536e-05,
"loss": 1.0814,
"step": 14900
},
{
"epoch": 41.1,
"learning_rate": 1.5036923076923075e-05,
"loss": 1.0672,
"step": 15000
},
{
"epoch": 41.1,
"eval_loss": 0.31958457827568054,
"eval_runtime": 145.6438,
"eval_samples_per_second": 33.252,
"eval_steps_per_second": 4.161,
"eval_wer": 0.277348850411581,
"step": 15000
},
{
"epoch": 41.37,
"learning_rate": 1.4575384615384614e-05,
"loss": 1.0709,
"step": 15100
},
{
"epoch": 41.64,
"learning_rate": 1.4113846153846153e-05,
"loss": 1.0833,
"step": 15200
},
{
"epoch": 41.92,
"learning_rate": 1.365230769230769e-05,
"loss": 1.0694,
"step": 15300
},
{
"epoch": 42.19,
"learning_rate": 1.3190769230769228e-05,
"loss": 1.063,
"step": 15400
},
{
"epoch": 42.47,
"learning_rate": 1.2729230769230768e-05,
"loss": 1.0838,
"step": 15500
},
{
"epoch": 42.47,
"eval_loss": 0.3129686713218689,
"eval_runtime": 146.1643,
"eval_samples_per_second": 33.134,
"eval_steps_per_second": 4.146,
"eval_wer": 0.2739142776043145,
"step": 15500
},
{
"epoch": 42.74,
"learning_rate": 1.2267692307692307e-05,
"loss": 1.0469,
"step": 15600
},
{
"epoch": 43.01,
"learning_rate": 1.181076923076923e-05,
"loss": 1.0746,
"step": 15700
},
{
"epoch": 43.29,
"learning_rate": 1.1349230769230769e-05,
"loss": 1.0495,
"step": 15800
},
{
"epoch": 43.56,
"learning_rate": 1.0887692307692306e-05,
"loss": 1.0553,
"step": 15900
},
{
"epoch": 43.83,
"learning_rate": 1.0426153846153846e-05,
"loss": 1.0553,
"step": 16000
},
{
"epoch": 43.83,
"eval_loss": 0.3164559602737427,
"eval_runtime": 146.1502,
"eval_samples_per_second": 33.137,
"eval_steps_per_second": 4.146,
"eval_wer": 0.27039455009934715,
"step": 16000
},
{
"epoch": 44.11,
"learning_rate": 9.964615384615385e-06,
"loss": 1.0537,
"step": 16100
},
{
"epoch": 44.38,
"learning_rate": 9.503076923076922e-06,
"loss": 1.049,
"step": 16200
},
{
"epoch": 44.66,
"learning_rate": 9.04153846153846e-06,
"loss": 1.0517,
"step": 16300
},
{
"epoch": 44.93,
"learning_rate": 8.58e-06,
"loss": 1.036,
"step": 16400
},
{
"epoch": 45.21,
"learning_rate": 8.118461538461538e-06,
"loss": 1.0786,
"step": 16500
},
{
"epoch": 45.21,
"eval_loss": 0.31082117557525635,
"eval_runtime": 146.4144,
"eval_samples_per_second": 33.077,
"eval_steps_per_second": 4.139,
"eval_wer": 0.270621629293216,
"step": 16500
},
{
"epoch": 45.48,
"learning_rate": 7.656923076923076e-06,
"loss": 1.0544,
"step": 16600
},
{
"epoch": 45.75,
"learning_rate": 7.195384615384615e-06,
"loss": 1.0702,
"step": 16700
},
{
"epoch": 46.03,
"learning_rate": 6.733846153846153e-06,
"loss": 1.035,
"step": 16800
},
{
"epoch": 46.3,
"learning_rate": 6.272307692307692e-06,
"loss": 1.0465,
"step": 16900
},
{
"epoch": 46.57,
"learning_rate": 5.81076923076923e-06,
"loss": 1.0546,
"step": 17000
},
{
"epoch": 46.57,
"eval_loss": 0.31021571159362793,
"eval_runtime": 145.9532,
"eval_samples_per_second": 33.182,
"eval_steps_per_second": 4.152,
"eval_wer": 0.267726369571388,
"step": 17000
},
{
"epoch": 46.85,
"learning_rate": 5.349230769230769e-06,
"loss": 1.0473,
"step": 17100
},
{
"epoch": 47.12,
"learning_rate": 4.887692307692307e-06,
"loss": 1.0422,
"step": 17200
},
{
"epoch": 47.4,
"learning_rate": 4.4261538461538455e-06,
"loss": 1.0418,
"step": 17300
},
{
"epoch": 47.67,
"learning_rate": 3.964615384615384e-06,
"loss": 1.0509,
"step": 17400
},
{
"epoch": 47.94,
"learning_rate": 3.5030769230769227e-06,
"loss": 1.0425,
"step": 17500
},
{
"epoch": 47.94,
"eval_loss": 0.3115251362323761,
"eval_runtime": 146.7424,
"eval_samples_per_second": 33.003,
"eval_steps_per_second": 4.13,
"eval_wer": 0.26792506386602327,
"step": 17500
},
{
"epoch": 48.22,
"learning_rate": 3.0415384615384615e-06,
"loss": 1.0463,
"step": 17600
},
{
"epoch": 48.49,
"learning_rate": 2.58e-06,
"loss": 1.0443,
"step": 17700
},
{
"epoch": 48.77,
"learning_rate": 2.1230769230769228e-06,
"loss": 1.0326,
"step": 17800
},
{
"epoch": 49.04,
"learning_rate": 1.6615384615384613e-06,
"loss": 1.0675,
"step": 17900
},
{
"epoch": 49.31,
"learning_rate": 1.2e-06,
"loss": 1.0398,
"step": 18000
},
{
"epoch": 49.31,
"eval_loss": 0.31313779950141907,
"eval_runtime": 145.9419,
"eval_samples_per_second": 33.184,
"eval_steps_per_second": 4.152,
"eval_wer": 0.2665909736020437,
"step": 18000
},
{
"epoch": 49.59,
"learning_rate": 7.384615384615384e-07,
"loss": 1.017,
"step": 18100
},
{
"epoch": 49.86,
"learning_rate": 2.769230769230769e-07,
"loss": 1.0519,
"step": 18200
},
{
"epoch": 50.0,
"step": 18250,
"total_flos": 5.432980623924042e+19,
"train_loss": 1.4442799015829009,
"train_runtime": 29714.5027,
"train_samples_per_second": 19.671,
"train_steps_per_second": 0.614
}
],
"max_steps": 18250,
"num_train_epochs": 50,
"total_flos": 5.432980623924042e+19,
"trial_name": null,
"trial_params": null
}