whisper-sm-farsipal-e5 / trainer_state.json
emilios's picture
End of training
e47ca00
raw
history blame
90.5 kB
{
"best_metric": 17.199108469539375,
"best_model_checkpoint": "./checkpoint-16000",
"epoch": 48.850746268656714,
"global_step": 20000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.06,
"learning_rate": 5.045361133432068e-07,
"loss": 0.3353,
"step": 25
},
{
"epoch": 0.12,
"learning_rate": 6.229195710491767e-07,
"loss": 0.2594,
"step": 50
},
{
"epoch": 0.19,
"learning_rate": 6.903829450223391e-07,
"loss": 0.2769,
"step": 75
},
{
"epoch": 0.25,
"learning_rate": 7.377725845391017e-07,
"loss": 0.2178,
"step": 100
},
{
"epoch": 0.31,
"learning_rate": 7.743343231239582e-07,
"loss": 0.2481,
"step": 125
},
{
"epoch": 0.37,
"learning_rate": 8.041073861170493e-07,
"loss": 0.2296,
"step": 150
},
{
"epoch": 0.44,
"learning_rate": 8.292222957399572e-07,
"loss": 0.2074,
"step": 175
},
{
"epoch": 0.5,
"learning_rate": 8.509413541357753e-07,
"loss": 0.2068,
"step": 200
},
{
"epoch": 0.56,
"learning_rate": 8.700744577655555e-07,
"loss": 0.2374,
"step": 225
},
{
"epoch": 0.62,
"learning_rate": 8.871723942761202e-07,
"loss": 0.2112,
"step": 250
},
{
"epoch": 0.68,
"learning_rate": 9.026267958246847e-07,
"loss": 0.2249,
"step": 275
},
{
"epoch": 0.75,
"learning_rate": 9.167261066633988e-07,
"loss": 0.1879,
"step": 300
},
{
"epoch": 0.81,
"learning_rate": 9.296889251455014e-07,
"loss": 0.2049,
"step": 325
},
{
"epoch": 0.87,
"learning_rate": 9.416848797368691e-07,
"loss": 0.1943,
"step": 350
},
{
"epoch": 0.93,
"learning_rate": 9.528482449516371e-07,
"loss": 0.1973,
"step": 375
},
{
"epoch": 1.0,
"learning_rate": 9.632871309784313e-07,
"loss": 0.1909,
"step": 400
},
{
"epoch": 1.06,
"learning_rate": 9.730898687853907e-07,
"loss": 0.2019,
"step": 425
},
{
"epoch": 1.12,
"learning_rate": 9.823295589572114e-07,
"loss": 0.1806,
"step": 450
},
{
"epoch": 1.18,
"learning_rate": 9.910673836465484e-07,
"loss": 0.1779,
"step": 475
},
{
"epoch": 1.24,
"learning_rate": 9.993550644973805e-07,
"loss": 0.1745,
"step": 500
},
{
"epoch": 1.31,
"learning_rate": 1e-06,
"loss": 0.1509,
"step": 525
},
{
"epoch": 1.37,
"learning_rate": 1e-06,
"loss": 0.1918,
"step": 550
},
{
"epoch": 1.43,
"learning_rate": 1e-06,
"loss": 0.1796,
"step": 575
},
{
"epoch": 1.49,
"learning_rate": 1e-06,
"loss": 0.2007,
"step": 600
},
{
"epoch": 1.55,
"learning_rate": 1e-06,
"loss": 0.1637,
"step": 625
},
{
"epoch": 1.62,
"learning_rate": 1e-06,
"loss": 0.1617,
"step": 650
},
{
"epoch": 1.68,
"learning_rate": 1e-06,
"loss": 0.1556,
"step": 675
},
{
"epoch": 1.74,
"learning_rate": 1e-06,
"loss": 0.1857,
"step": 700
},
{
"epoch": 1.8,
"learning_rate": 1e-06,
"loss": 0.1498,
"step": 725
},
{
"epoch": 1.87,
"learning_rate": 1e-06,
"loss": 0.1563,
"step": 750
},
{
"epoch": 1.93,
"learning_rate": 1e-06,
"loss": 0.1568,
"step": 775
},
{
"epoch": 1.99,
"learning_rate": 1e-06,
"loss": 0.1693,
"step": 800
},
{
"epoch": 2.05,
"learning_rate": 1e-06,
"loss": 0.1776,
"step": 825
},
{
"epoch": 2.11,
"learning_rate": 1e-06,
"loss": 0.1594,
"step": 850
},
{
"epoch": 2.18,
"learning_rate": 1e-06,
"loss": 0.1356,
"step": 875
},
{
"epoch": 2.24,
"learning_rate": 1e-06,
"loss": 0.1629,
"step": 900
},
{
"epoch": 2.3,
"learning_rate": 1e-06,
"loss": 0.1655,
"step": 925
},
{
"epoch": 2.36,
"learning_rate": 1e-06,
"loss": 0.1624,
"step": 950
},
{
"epoch": 2.43,
"learning_rate": 1e-06,
"loss": 0.1435,
"step": 975
},
{
"epoch": 2.49,
"learning_rate": 1e-06,
"loss": 0.1259,
"step": 1000
},
{
"epoch": 2.49,
"eval_loss": 0.4833984375,
"eval_runtime": 168.2921,
"eval_samples_per_second": 10.078,
"eval_steps_per_second": 0.63,
"eval_wer": 18.36924219910847,
"step": 1000
},
{
"epoch": 2.06,
"learning_rate": 1e-06,
"loss": 0.1452,
"step": 1025
},
{
"epoch": 2.12,
"learning_rate": 1e-06,
"loss": 0.1351,
"step": 1050
},
{
"epoch": 2.19,
"learning_rate": 1e-06,
"loss": 0.1642,
"step": 1075
},
{
"epoch": 2.25,
"learning_rate": 1e-06,
"loss": 0.1298,
"step": 1100
},
{
"epoch": 2.31,
"learning_rate": 1e-06,
"loss": 0.1491,
"step": 1125
},
{
"epoch": 2.37,
"learning_rate": 1e-06,
"loss": 0.143,
"step": 1150
},
{
"epoch": 2.44,
"learning_rate": 1e-06,
"loss": 0.1376,
"step": 1175
},
{
"epoch": 2.5,
"learning_rate": 1e-06,
"loss": 0.1408,
"step": 1200
},
{
"epoch": 2.56,
"learning_rate": 1e-06,
"loss": 0.161,
"step": 1225
},
{
"epoch": 2.62,
"learning_rate": 1e-06,
"loss": 0.1466,
"step": 1250
},
{
"epoch": 2.68,
"learning_rate": 1e-06,
"loss": 0.1535,
"step": 1275
},
{
"epoch": 2.75,
"learning_rate": 1e-06,
"loss": 0.1325,
"step": 1300
},
{
"epoch": 2.81,
"learning_rate": 1e-06,
"loss": 0.1426,
"step": 1325
},
{
"epoch": 2.87,
"learning_rate": 1e-06,
"loss": 0.1365,
"step": 1350
},
{
"epoch": 2.93,
"learning_rate": 1e-06,
"loss": 0.1407,
"step": 1375
},
{
"epoch": 3.0,
"learning_rate": 1e-06,
"loss": 0.141,
"step": 1400
},
{
"epoch": 3.06,
"learning_rate": 1e-06,
"loss": 0.1449,
"step": 1425
},
{
"epoch": 3.12,
"learning_rate": 1e-06,
"loss": 0.1335,
"step": 1450
},
{
"epoch": 3.18,
"learning_rate": 1e-06,
"loss": 0.1304,
"step": 1475
},
{
"epoch": 3.24,
"learning_rate": 1e-06,
"loss": 0.1277,
"step": 1500
},
{
"epoch": 3.31,
"learning_rate": 1e-06,
"loss": 0.1107,
"step": 1525
},
{
"epoch": 3.37,
"learning_rate": 1e-06,
"loss": 0.1462,
"step": 1550
},
{
"epoch": 3.43,
"learning_rate": 1e-06,
"loss": 0.1399,
"step": 1575
},
{
"epoch": 3.49,
"learning_rate": 1e-06,
"loss": 0.1422,
"step": 1600
},
{
"epoch": 3.55,
"learning_rate": 1e-06,
"loss": 0.1211,
"step": 1625
},
{
"epoch": 3.62,
"learning_rate": 1e-06,
"loss": 0.1231,
"step": 1650
},
{
"epoch": 3.68,
"learning_rate": 1e-06,
"loss": 0.1151,
"step": 1675
},
{
"epoch": 3.74,
"learning_rate": 1e-06,
"loss": 0.1436,
"step": 1700
},
{
"epoch": 3.8,
"learning_rate": 1e-06,
"loss": 0.1178,
"step": 1725
},
{
"epoch": 3.87,
"learning_rate": 1e-06,
"loss": 0.1201,
"step": 1750
},
{
"epoch": 3.93,
"learning_rate": 1e-06,
"loss": 0.1237,
"step": 1775
},
{
"epoch": 3.99,
"learning_rate": 1e-06,
"loss": 0.1331,
"step": 1800
},
{
"epoch": 4.05,
"learning_rate": 1e-06,
"loss": 0.1412,
"step": 1825
},
{
"epoch": 4.11,
"learning_rate": 1e-06,
"loss": 0.1262,
"step": 1850
},
{
"epoch": 4.18,
"learning_rate": 1e-06,
"loss": 0.1042,
"step": 1875
},
{
"epoch": 4.24,
"learning_rate": 1e-06,
"loss": 0.1227,
"step": 1900
},
{
"epoch": 4.3,
"learning_rate": 1e-06,
"loss": 0.126,
"step": 1925
},
{
"epoch": 4.36,
"learning_rate": 1e-06,
"loss": 0.1301,
"step": 1950
},
{
"epoch": 4.43,
"learning_rate": 1e-06,
"loss": 0.1089,
"step": 1975
},
{
"epoch": 4.49,
"learning_rate": 1e-06,
"loss": 0.1002,
"step": 2000
},
{
"epoch": 4.49,
"eval_loss": 0.46044921875,
"eval_runtime": 168.0874,
"eval_samples_per_second": 10.09,
"eval_steps_per_second": 0.631,
"eval_wer": 17.802748885586926,
"step": 2000
},
{
"epoch": 4.55,
"learning_rate": 1e-06,
"loss": 0.1021,
"step": 2025
},
{
"epoch": 4.61,
"learning_rate": 1e-06,
"loss": 0.1275,
"step": 2050
},
{
"epoch": 4.67,
"learning_rate": 1e-06,
"loss": 0.1281,
"step": 2075
},
{
"epoch": 4.74,
"learning_rate": 1e-06,
"loss": 0.131,
"step": 2100
},
{
"epoch": 4.8,
"learning_rate": 1e-06,
"loss": 0.107,
"step": 2125
},
{
"epoch": 4.86,
"learning_rate": 1e-06,
"loss": 0.1257,
"step": 2150
},
{
"epoch": 4.92,
"learning_rate": 1e-06,
"loss": 0.1182,
"step": 2175
},
{
"epoch": 4.99,
"learning_rate": 1e-06,
"loss": 0.1213,
"step": 2200
},
{
"epoch": 5.05,
"learning_rate": 1e-06,
"loss": 0.1106,
"step": 2225
},
{
"epoch": 5.11,
"learning_rate": 1e-06,
"loss": 0.1111,
"step": 2250
},
{
"epoch": 5.17,
"learning_rate": 1e-06,
"loss": 0.1192,
"step": 2275
},
{
"epoch": 5.23,
"learning_rate": 1e-06,
"loss": 0.1076,
"step": 2300
},
{
"epoch": 5.3,
"learning_rate": 1e-06,
"loss": 0.1187,
"step": 2325
},
{
"epoch": 5.36,
"learning_rate": 1e-06,
"loss": 0.12,
"step": 2350
},
{
"epoch": 5.42,
"learning_rate": 1e-06,
"loss": 0.1053,
"step": 2375
},
{
"epoch": 5.48,
"learning_rate": 1e-06,
"loss": 0.106,
"step": 2400
},
{
"epoch": 5.54,
"learning_rate": 1e-06,
"loss": 0.1108,
"step": 2425
},
{
"epoch": 5.61,
"learning_rate": 1e-06,
"loss": 0.1278,
"step": 2450
},
{
"epoch": 5.67,
"learning_rate": 1e-06,
"loss": 0.1233,
"step": 2475
},
{
"epoch": 5.73,
"learning_rate": 1e-06,
"loss": 0.1041,
"step": 2500
},
{
"epoch": 5.79,
"learning_rate": 1e-06,
"loss": 0.1018,
"step": 2525
},
{
"epoch": 5.86,
"learning_rate": 1e-06,
"loss": 0.0957,
"step": 2550
},
{
"epoch": 5.92,
"learning_rate": 1e-06,
"loss": 0.1136,
"step": 2575
},
{
"epoch": 5.98,
"learning_rate": 1e-06,
"loss": 0.1133,
"step": 2600
},
{
"epoch": 6.04,
"learning_rate": 1e-06,
"loss": 0.1157,
"step": 2625
},
{
"epoch": 6.1,
"learning_rate": 1e-06,
"loss": 0.1011,
"step": 2650
},
{
"epoch": 6.17,
"learning_rate": 1e-06,
"loss": 0.0926,
"step": 2675
},
{
"epoch": 6.23,
"learning_rate": 1e-06,
"loss": 0.1116,
"step": 2700
},
{
"epoch": 6.29,
"learning_rate": 1e-06,
"loss": 0.0931,
"step": 2725
},
{
"epoch": 6.35,
"learning_rate": 1e-06,
"loss": 0.098,
"step": 2750
},
{
"epoch": 6.42,
"learning_rate": 1e-06,
"loss": 0.0973,
"step": 2775
},
{
"epoch": 6.48,
"learning_rate": 1e-06,
"loss": 0.1092,
"step": 2800
},
{
"epoch": 6.54,
"learning_rate": 1e-06,
"loss": 0.1118,
"step": 2825
},
{
"epoch": 6.6,
"learning_rate": 1e-06,
"loss": 0.1032,
"step": 2850
},
{
"epoch": 6.66,
"learning_rate": 1e-06,
"loss": 0.098,
"step": 2875
},
{
"epoch": 6.73,
"learning_rate": 1e-06,
"loss": 0.1111,
"step": 2900
},
{
"epoch": 6.79,
"learning_rate": 1e-06,
"loss": 0.1102,
"step": 2925
},
{
"epoch": 6.85,
"learning_rate": 1e-06,
"loss": 0.094,
"step": 2950
},
{
"epoch": 6.91,
"learning_rate": 1e-06,
"loss": 0.1086,
"step": 2975
},
{
"epoch": 6.98,
"learning_rate": 1e-06,
"loss": 0.1096,
"step": 3000
},
{
"epoch": 6.98,
"eval_loss": 0.455322265625,
"eval_runtime": 167.3332,
"eval_samples_per_second": 10.135,
"eval_steps_per_second": 0.633,
"eval_wer": 17.87704309063893,
"step": 3000
},
{
"epoch": 7.04,
"learning_rate": 1e-06,
"loss": 0.0979,
"step": 3025
},
{
"epoch": 7.1,
"learning_rate": 1e-06,
"loss": 0.0971,
"step": 3050
},
{
"epoch": 7.16,
"learning_rate": 1e-06,
"loss": 0.1025,
"step": 3075
},
{
"epoch": 7.22,
"learning_rate": 1e-06,
"loss": 0.0848,
"step": 3100
},
{
"epoch": 7.29,
"learning_rate": 1e-06,
"loss": 0.1134,
"step": 3125
},
{
"epoch": 7.35,
"learning_rate": 1e-06,
"loss": 0.0928,
"step": 3150
},
{
"epoch": 7.41,
"learning_rate": 1e-06,
"loss": 0.0959,
"step": 3175
},
{
"epoch": 7.47,
"learning_rate": 1e-06,
"loss": 0.0994,
"step": 3200
},
{
"epoch": 7.53,
"learning_rate": 1e-06,
"loss": 0.1094,
"step": 3225
},
{
"epoch": 7.6,
"learning_rate": 1e-06,
"loss": 0.0954,
"step": 3250
},
{
"epoch": 7.66,
"learning_rate": 1e-06,
"loss": 0.0914,
"step": 3275
},
{
"epoch": 7.72,
"learning_rate": 1e-06,
"loss": 0.0901,
"step": 3300
},
{
"epoch": 7.78,
"learning_rate": 1e-06,
"loss": 0.1004,
"step": 3325
},
{
"epoch": 7.85,
"learning_rate": 1e-06,
"loss": 0.0985,
"step": 3350
},
{
"epoch": 7.91,
"learning_rate": 1e-06,
"loss": 0.111,
"step": 3375
},
{
"epoch": 7.97,
"learning_rate": 1e-06,
"loss": 0.0901,
"step": 3400
},
{
"epoch": 8.03,
"learning_rate": 1e-06,
"loss": 0.0897,
"step": 3425
},
{
"epoch": 8.09,
"learning_rate": 1e-06,
"loss": 0.0865,
"step": 3450
},
{
"epoch": 8.16,
"learning_rate": 1e-06,
"loss": 0.0944,
"step": 3475
},
{
"epoch": 8.22,
"learning_rate": 1e-06,
"loss": 0.1072,
"step": 3500
},
{
"epoch": 8.28,
"learning_rate": 1e-06,
"loss": 0.0896,
"step": 3525
},
{
"epoch": 8.34,
"learning_rate": 1e-06,
"loss": 0.0963,
"step": 3550
},
{
"epoch": 8.41,
"learning_rate": 1e-06,
"loss": 0.0897,
"step": 3575
},
{
"epoch": 8.47,
"learning_rate": 1e-06,
"loss": 0.0806,
"step": 3600
},
{
"epoch": 8.53,
"learning_rate": 1e-06,
"loss": 0.1005,
"step": 3625
},
{
"epoch": 8.59,
"learning_rate": 1e-06,
"loss": 0.083,
"step": 3650
},
{
"epoch": 8.65,
"learning_rate": 1e-06,
"loss": 0.0957,
"step": 3675
},
{
"epoch": 8.72,
"learning_rate": 1e-06,
"loss": 0.0897,
"step": 3700
},
{
"epoch": 8.78,
"learning_rate": 1e-06,
"loss": 0.1062,
"step": 3725
},
{
"epoch": 8.84,
"learning_rate": 1e-06,
"loss": 0.0846,
"step": 3750
},
{
"epoch": 8.9,
"learning_rate": 1e-06,
"loss": 0.0937,
"step": 3775
},
{
"epoch": 8.97,
"learning_rate": 1e-06,
"loss": 0.0826,
"step": 3800
},
{
"epoch": 9.03,
"learning_rate": 1e-06,
"loss": 0.0857,
"step": 3825
},
{
"epoch": 9.09,
"learning_rate": 1e-06,
"loss": 0.0873,
"step": 3850
},
{
"epoch": 9.15,
"learning_rate": 1e-06,
"loss": 0.099,
"step": 3875
},
{
"epoch": 9.21,
"learning_rate": 1e-06,
"loss": 0.0895,
"step": 3900
},
{
"epoch": 9.28,
"learning_rate": 1e-06,
"loss": 0.0841,
"step": 3925
},
{
"epoch": 9.34,
"learning_rate": 1e-06,
"loss": 0.0771,
"step": 3950
},
{
"epoch": 9.4,
"learning_rate": 1e-06,
"loss": 0.0903,
"step": 3975
},
{
"epoch": 9.46,
"learning_rate": 1e-06,
"loss": 0.0885,
"step": 4000
},
{
"epoch": 9.46,
"eval_loss": 0.455078125,
"eval_runtime": 168.2379,
"eval_samples_per_second": 10.081,
"eval_steps_per_second": 0.63,
"eval_wer": 17.960624071322435,
"step": 4000
},
{
"epoch": 9.52,
"learning_rate": 1e-06,
"loss": 0.0936,
"step": 4025
},
{
"epoch": 9.59,
"learning_rate": 1e-06,
"loss": 0.0765,
"step": 4050
},
{
"epoch": 9.65,
"learning_rate": 1e-06,
"loss": 0.0946,
"step": 4075
},
{
"epoch": 9.71,
"learning_rate": 1e-06,
"loss": 0.0856,
"step": 4100
},
{
"epoch": 9.77,
"learning_rate": 1e-06,
"loss": 0.0928,
"step": 4125
},
{
"epoch": 9.84,
"learning_rate": 1e-06,
"loss": 0.0843,
"step": 4150
},
{
"epoch": 9.9,
"learning_rate": 1e-06,
"loss": 0.0824,
"step": 4175
},
{
"epoch": 9.96,
"learning_rate": 1e-06,
"loss": 0.0947,
"step": 4200
},
{
"epoch": 10.02,
"learning_rate": 1e-06,
"loss": 0.0724,
"step": 4225
},
{
"epoch": 10.08,
"learning_rate": 1e-06,
"loss": 0.086,
"step": 4250
},
{
"epoch": 10.15,
"learning_rate": 1e-06,
"loss": 0.085,
"step": 4275
},
{
"epoch": 10.21,
"learning_rate": 1e-06,
"loss": 0.0894,
"step": 4300
},
{
"epoch": 10.27,
"learning_rate": 1e-06,
"loss": 0.0889,
"step": 4325
},
{
"epoch": 10.33,
"learning_rate": 1e-06,
"loss": 0.0753,
"step": 4350
},
{
"epoch": 10.4,
"learning_rate": 1e-06,
"loss": 0.0767,
"step": 4375
},
{
"epoch": 10.46,
"learning_rate": 1e-06,
"loss": 0.0844,
"step": 4400
},
{
"epoch": 10.52,
"learning_rate": 1e-06,
"loss": 0.0781,
"step": 4425
},
{
"epoch": 10.58,
"learning_rate": 1e-06,
"loss": 0.0838,
"step": 4450
},
{
"epoch": 10.64,
"learning_rate": 1e-06,
"loss": 0.0793,
"step": 4475
},
{
"epoch": 10.71,
"learning_rate": 1e-06,
"loss": 0.0863,
"step": 4500
},
{
"epoch": 10.77,
"learning_rate": 1e-06,
"loss": 0.0747,
"step": 4525
},
{
"epoch": 10.83,
"learning_rate": 1e-06,
"loss": 0.0853,
"step": 4550
},
{
"epoch": 10.89,
"learning_rate": 1e-06,
"loss": 0.0798,
"step": 4575
},
{
"epoch": 10.96,
"learning_rate": 1e-06,
"loss": 0.0758,
"step": 4600
},
{
"epoch": 11.02,
"learning_rate": 1e-06,
"loss": 0.0873,
"step": 4625
},
{
"epoch": 11.08,
"learning_rate": 1e-06,
"loss": 0.0795,
"step": 4650
},
{
"epoch": 11.14,
"learning_rate": 1e-06,
"loss": 0.0859,
"step": 4675
},
{
"epoch": 11.2,
"learning_rate": 1e-06,
"loss": 0.0747,
"step": 4700
},
{
"epoch": 11.27,
"learning_rate": 1e-06,
"loss": 0.078,
"step": 4725
},
{
"epoch": 11.33,
"learning_rate": 1e-06,
"loss": 0.0705,
"step": 4750
},
{
"epoch": 11.39,
"learning_rate": 1e-06,
"loss": 0.0829,
"step": 4775
},
{
"epoch": 11.45,
"learning_rate": 1e-06,
"loss": 0.0793,
"step": 4800
},
{
"epoch": 11.51,
"learning_rate": 1e-06,
"loss": 0.0806,
"step": 4825
},
{
"epoch": 11.58,
"learning_rate": 1e-06,
"loss": 0.0932,
"step": 4850
},
{
"epoch": 11.64,
"learning_rate": 1e-06,
"loss": 0.0699,
"step": 4875
},
{
"epoch": 11.7,
"learning_rate": 1e-06,
"loss": 0.0749,
"step": 4900
},
{
"epoch": 11.76,
"learning_rate": 1e-06,
"loss": 0.0814,
"step": 4925
},
{
"epoch": 11.83,
"learning_rate": 1e-06,
"loss": 0.085,
"step": 4950
},
{
"epoch": 11.89,
"learning_rate": 1e-06,
"loss": 0.0767,
"step": 4975
},
{
"epoch": 11.95,
"learning_rate": 1e-06,
"loss": 0.0675,
"step": 5000
},
{
"epoch": 11.95,
"eval_loss": 0.463134765625,
"eval_runtime": 167.4714,
"eval_samples_per_second": 10.127,
"eval_steps_per_second": 0.633,
"eval_wer": 17.904903417533433,
"step": 5000
},
{
"epoch": 12.01,
"learning_rate": 1e-06,
"loss": 0.0769,
"step": 5025
},
{
"epoch": 12.07,
"learning_rate": 1e-06,
"loss": 0.0795,
"step": 5050
},
{
"epoch": 12.14,
"learning_rate": 1e-06,
"loss": 0.0744,
"step": 5075
},
{
"epoch": 12.2,
"learning_rate": 1e-06,
"loss": 0.0789,
"step": 5100
},
{
"epoch": 12.26,
"learning_rate": 1e-06,
"loss": 0.0672,
"step": 5125
},
{
"epoch": 12.32,
"learning_rate": 1e-06,
"loss": 0.072,
"step": 5150
},
{
"epoch": 12.39,
"learning_rate": 1e-06,
"loss": 0.0677,
"step": 5175
},
{
"epoch": 12.45,
"learning_rate": 1e-06,
"loss": 0.068,
"step": 5200
},
{
"epoch": 12.51,
"learning_rate": 1e-06,
"loss": 0.0646,
"step": 5225
},
{
"epoch": 12.57,
"learning_rate": 1e-06,
"loss": 0.0703,
"step": 5250
},
{
"epoch": 12.63,
"learning_rate": 1e-06,
"loss": 0.078,
"step": 5275
},
{
"epoch": 12.7,
"learning_rate": 1e-06,
"loss": 0.0714,
"step": 5300
},
{
"epoch": 12.76,
"learning_rate": 1e-06,
"loss": 0.0723,
"step": 5325
},
{
"epoch": 12.82,
"learning_rate": 1e-06,
"loss": 0.0871,
"step": 5350
},
{
"epoch": 12.88,
"learning_rate": 1e-06,
"loss": 0.0757,
"step": 5375
},
{
"epoch": 12.95,
"learning_rate": 1e-06,
"loss": 0.0835,
"step": 5400
},
{
"epoch": 13.01,
"learning_rate": 1e-06,
"loss": 0.0727,
"step": 5425
},
{
"epoch": 13.07,
"learning_rate": 1e-06,
"loss": 0.081,
"step": 5450
},
{
"epoch": 13.13,
"learning_rate": 1e-06,
"loss": 0.0642,
"step": 5475
},
{
"epoch": 13.19,
"learning_rate": 1e-06,
"loss": 0.066,
"step": 5500
},
{
"epoch": 13.26,
"learning_rate": 1e-06,
"loss": 0.0746,
"step": 5525
},
{
"epoch": 13.32,
"learning_rate": 1e-06,
"loss": 0.0725,
"step": 5550
},
{
"epoch": 13.38,
"learning_rate": 1e-06,
"loss": 0.0767,
"step": 5575
},
{
"epoch": 13.44,
"learning_rate": 1e-06,
"loss": 0.0731,
"step": 5600
},
{
"epoch": 13.5,
"learning_rate": 1e-06,
"loss": 0.0648,
"step": 5625
},
{
"epoch": 13.57,
"learning_rate": 1e-06,
"loss": 0.0686,
"step": 5650
},
{
"epoch": 13.63,
"learning_rate": 1e-06,
"loss": 0.0691,
"step": 5675
},
{
"epoch": 13.69,
"learning_rate": 1e-06,
"loss": 0.0787,
"step": 5700
},
{
"epoch": 13.75,
"learning_rate": 1e-06,
"loss": 0.0648,
"step": 5725
},
{
"epoch": 13.82,
"learning_rate": 1e-06,
"loss": 0.0659,
"step": 5750
},
{
"epoch": 13.88,
"learning_rate": 1e-06,
"loss": 0.0769,
"step": 5775
},
{
"epoch": 13.94,
"learning_rate": 1e-06,
"loss": 0.0596,
"step": 5800
},
{
"epoch": 14.0,
"learning_rate": 1e-06,
"loss": 0.0637,
"step": 5825
},
{
"epoch": 14.06,
"learning_rate": 1e-06,
"loss": 0.073,
"step": 5850
},
{
"epoch": 14.13,
"learning_rate": 1e-06,
"loss": 0.0579,
"step": 5875
},
{
"epoch": 14.19,
"learning_rate": 1e-06,
"loss": 0.07,
"step": 5900
},
{
"epoch": 14.25,
"learning_rate": 1e-06,
"loss": 0.0683,
"step": 5925
},
{
"epoch": 14.31,
"learning_rate": 1e-06,
"loss": 0.0585,
"step": 5950
},
{
"epoch": 14.38,
"learning_rate": 1e-06,
"loss": 0.069,
"step": 5975
},
{
"epoch": 14.44,
"learning_rate": 1e-06,
"loss": 0.0675,
"step": 6000
},
{
"epoch": 14.44,
"eval_loss": 0.4619140625,
"eval_runtime": 167.54,
"eval_samples_per_second": 10.123,
"eval_steps_per_second": 0.633,
"eval_wer": 17.904903417533433,
"step": 6000
},
{
"epoch": 14.5,
"learning_rate": 1e-06,
"loss": 0.0641,
"step": 6025
},
{
"epoch": 14.56,
"learning_rate": 1e-06,
"loss": 0.0646,
"step": 6050
},
{
"epoch": 14.62,
"learning_rate": 1e-06,
"loss": 0.0718,
"step": 6075
},
{
"epoch": 14.69,
"learning_rate": 1e-06,
"loss": 0.0644,
"step": 6100
},
{
"epoch": 14.75,
"learning_rate": 1e-06,
"loss": 0.0758,
"step": 6125
},
{
"epoch": 14.81,
"learning_rate": 1e-06,
"loss": 0.0598,
"step": 6150
},
{
"epoch": 14.87,
"learning_rate": 1e-06,
"loss": 0.0747,
"step": 6175
},
{
"epoch": 14.94,
"learning_rate": 1e-06,
"loss": 0.0604,
"step": 6200
},
{
"epoch": 15.0,
"learning_rate": 1e-06,
"loss": 0.0657,
"step": 6225
},
{
"epoch": 15.06,
"learning_rate": 1e-06,
"loss": 0.0584,
"step": 6250
},
{
"epoch": 15.12,
"learning_rate": 1e-06,
"loss": 0.0646,
"step": 6275
},
{
"epoch": 15.18,
"learning_rate": 1e-06,
"loss": 0.0597,
"step": 6300
},
{
"epoch": 15.25,
"learning_rate": 1e-06,
"loss": 0.0609,
"step": 6325
},
{
"epoch": 15.31,
"learning_rate": 1e-06,
"loss": 0.059,
"step": 6350
},
{
"epoch": 15.37,
"learning_rate": 1e-06,
"loss": 0.0605,
"step": 6375
},
{
"epoch": 15.43,
"learning_rate": 1e-06,
"loss": 0.0637,
"step": 6400
},
{
"epoch": 15.5,
"learning_rate": 1e-06,
"loss": 0.051,
"step": 6425
},
{
"epoch": 15.56,
"learning_rate": 1e-06,
"loss": 0.0607,
"step": 6450
},
{
"epoch": 15.62,
"learning_rate": 1e-06,
"loss": 0.068,
"step": 6475
},
{
"epoch": 15.68,
"learning_rate": 1e-06,
"loss": 0.0613,
"step": 6500
},
{
"epoch": 15.74,
"learning_rate": 1e-06,
"loss": 0.0701,
"step": 6525
},
{
"epoch": 15.81,
"learning_rate": 1e-06,
"loss": 0.0656,
"step": 6550
},
{
"epoch": 15.87,
"learning_rate": 1e-06,
"loss": 0.0665,
"step": 6575
},
{
"epoch": 15.93,
"learning_rate": 1e-06,
"loss": 0.0769,
"step": 6600
},
{
"epoch": 15.99,
"learning_rate": 1e-06,
"loss": 0.0629,
"step": 6625
},
{
"epoch": 16.05,
"learning_rate": 1e-06,
"loss": 0.0569,
"step": 6650
},
{
"epoch": 16.12,
"learning_rate": 1e-06,
"loss": 0.0587,
"step": 6675
},
{
"epoch": 16.18,
"learning_rate": 1e-06,
"loss": 0.0597,
"step": 6700
},
{
"epoch": 16.24,
"learning_rate": 1e-06,
"loss": 0.059,
"step": 6725
},
{
"epoch": 16.3,
"learning_rate": 1e-06,
"loss": 0.0556,
"step": 6750
},
{
"epoch": 16.37,
"learning_rate": 1e-06,
"loss": 0.0596,
"step": 6775
},
{
"epoch": 16.43,
"learning_rate": 1e-06,
"loss": 0.0598,
"step": 6800
},
{
"epoch": 16.49,
"learning_rate": 1e-06,
"loss": 0.0606,
"step": 6825
},
{
"epoch": 16.55,
"learning_rate": 1e-06,
"loss": 0.0616,
"step": 6850
},
{
"epoch": 16.61,
"learning_rate": 1e-06,
"loss": 0.0547,
"step": 6875
},
{
"epoch": 16.68,
"learning_rate": 1e-06,
"loss": 0.0564,
"step": 6900
},
{
"epoch": 16.74,
"learning_rate": 1e-06,
"loss": 0.0623,
"step": 6925
},
{
"epoch": 16.8,
"learning_rate": 1e-06,
"loss": 0.0597,
"step": 6950
},
{
"epoch": 16.86,
"learning_rate": 1e-06,
"loss": 0.0666,
"step": 6975
},
{
"epoch": 16.93,
"learning_rate": 1e-06,
"loss": 0.0645,
"step": 7000
},
{
"epoch": 16.93,
"eval_loss": 0.4677734375,
"eval_runtime": 167.4906,
"eval_samples_per_second": 10.126,
"eval_steps_per_second": 0.633,
"eval_wer": 17.672734026745914,
"step": 7000
},
{
"epoch": 16.99,
"learning_rate": 1e-06,
"loss": 0.0557,
"step": 7025
},
{
"epoch": 17.05,
"learning_rate": 1e-06,
"loss": 0.0678,
"step": 7050
},
{
"epoch": 17.11,
"learning_rate": 1e-06,
"loss": 0.0547,
"step": 7075
},
{
"epoch": 17.17,
"learning_rate": 1e-06,
"loss": 0.0533,
"step": 7100
},
{
"epoch": 17.24,
"learning_rate": 1e-06,
"loss": 0.0604,
"step": 7125
},
{
"epoch": 17.3,
"learning_rate": 1e-06,
"loss": 0.0587,
"step": 7150
},
{
"epoch": 17.36,
"learning_rate": 1e-06,
"loss": 0.0616,
"step": 7175
},
{
"epoch": 17.42,
"learning_rate": 1e-06,
"loss": 0.0585,
"step": 7200
},
{
"epoch": 17.49,
"learning_rate": 1e-06,
"loss": 0.0638,
"step": 7225
},
{
"epoch": 17.55,
"learning_rate": 1e-06,
"loss": 0.0586,
"step": 7250
},
{
"epoch": 17.61,
"learning_rate": 1e-06,
"loss": 0.0457,
"step": 7275
},
{
"epoch": 17.67,
"learning_rate": 1e-06,
"loss": 0.0531,
"step": 7300
},
{
"epoch": 17.73,
"learning_rate": 1e-06,
"loss": 0.0607,
"step": 7325
},
{
"epoch": 17.8,
"learning_rate": 1e-06,
"loss": 0.0564,
"step": 7350
},
{
"epoch": 17.86,
"learning_rate": 1e-06,
"loss": 0.0586,
"step": 7375
},
{
"epoch": 17.92,
"learning_rate": 1e-06,
"loss": 0.0647,
"step": 7400
},
{
"epoch": 17.98,
"learning_rate": 1e-06,
"loss": 0.0554,
"step": 7425
},
{
"epoch": 18.04,
"learning_rate": 1e-06,
"loss": 0.0558,
"step": 7450
},
{
"epoch": 18.11,
"learning_rate": 1e-06,
"loss": 0.0517,
"step": 7475
},
{
"epoch": 18.17,
"learning_rate": 1e-06,
"loss": 0.0507,
"step": 7500
},
{
"epoch": 18.23,
"learning_rate": 1e-06,
"loss": 0.063,
"step": 7525
},
{
"epoch": 18.29,
"learning_rate": 1e-06,
"loss": 0.0533,
"step": 7550
},
{
"epoch": 18.36,
"learning_rate": 1e-06,
"loss": 0.0666,
"step": 7575
},
{
"epoch": 18.42,
"learning_rate": 1e-06,
"loss": 0.0561,
"step": 7600
},
{
"epoch": 18.48,
"learning_rate": 1e-06,
"loss": 0.0545,
"step": 7625
},
{
"epoch": 18.54,
"learning_rate": 1e-06,
"loss": 0.0522,
"step": 7650
},
{
"epoch": 18.6,
"learning_rate": 1e-06,
"loss": 0.0527,
"step": 7675
},
{
"epoch": 18.67,
"learning_rate": 1e-06,
"loss": 0.049,
"step": 7700
},
{
"epoch": 18.73,
"learning_rate": 1e-06,
"loss": 0.0546,
"step": 7725
},
{
"epoch": 18.79,
"learning_rate": 1e-06,
"loss": 0.058,
"step": 7750
},
{
"epoch": 18.85,
"learning_rate": 1e-06,
"loss": 0.048,
"step": 7775
},
{
"epoch": 18.92,
"learning_rate": 1e-06,
"loss": 0.0585,
"step": 7800
},
{
"epoch": 18.98,
"learning_rate": 1e-06,
"loss": 0.0575,
"step": 7825
},
{
"epoch": 19.04,
"learning_rate": 1e-06,
"loss": 0.0629,
"step": 7850
},
{
"epoch": 19.1,
"learning_rate": 1e-06,
"loss": 0.0531,
"step": 7875
},
{
"epoch": 19.16,
"learning_rate": 1e-06,
"loss": 0.0581,
"step": 7900
},
{
"epoch": 19.23,
"learning_rate": 1e-06,
"loss": 0.0508,
"step": 7925
},
{
"epoch": 19.29,
"learning_rate": 1e-06,
"loss": 0.0531,
"step": 7950
},
{
"epoch": 19.35,
"learning_rate": 1e-06,
"loss": 0.0444,
"step": 7975
},
{
"epoch": 19.41,
"learning_rate": 1e-06,
"loss": 0.0535,
"step": 8000
},
{
"epoch": 19.41,
"eval_loss": 0.468505859375,
"eval_runtime": 167.6281,
"eval_samples_per_second": 10.118,
"eval_steps_per_second": 0.632,
"eval_wer": 17.663447251114412,
"step": 8000
},
{
"epoch": 19.06,
"learning_rate": 1e-06,
"loss": 0.0499,
"step": 8025
},
{
"epoch": 19.12,
"learning_rate": 1e-06,
"loss": 0.0509,
"step": 8050
},
{
"epoch": 19.19,
"learning_rate": 1e-06,
"loss": 0.057,
"step": 8075
},
{
"epoch": 19.25,
"learning_rate": 1e-06,
"loss": 0.0482,
"step": 8100
},
{
"epoch": 19.31,
"learning_rate": 1e-06,
"loss": 0.0537,
"step": 8125
},
{
"epoch": 19.37,
"learning_rate": 1e-06,
"loss": 0.0534,
"step": 8150
},
{
"epoch": 19.44,
"learning_rate": 1e-06,
"loss": 0.0506,
"step": 8175
},
{
"epoch": 19.5,
"learning_rate": 1e-06,
"loss": 0.0496,
"step": 8200
},
{
"epoch": 19.56,
"learning_rate": 1e-06,
"loss": 0.0569,
"step": 8225
},
{
"epoch": 19.62,
"learning_rate": 1e-06,
"loss": 0.054,
"step": 8250
},
{
"epoch": 19.68,
"learning_rate": 1e-06,
"loss": 0.0568,
"step": 8275
},
{
"epoch": 19.75,
"learning_rate": 1e-06,
"loss": 0.048,
"step": 8300
},
{
"epoch": 19.81,
"learning_rate": 1e-06,
"loss": 0.0514,
"step": 8325
},
{
"epoch": 19.87,
"learning_rate": 1e-06,
"loss": 0.0506,
"step": 8350
},
{
"epoch": 19.93,
"learning_rate": 1e-06,
"loss": 0.055,
"step": 8375
},
{
"epoch": 20.0,
"learning_rate": 1e-06,
"loss": 0.0497,
"step": 8400
},
{
"epoch": 20.06,
"learning_rate": 1e-06,
"loss": 0.0547,
"step": 8425
},
{
"epoch": 20.12,
"learning_rate": 1e-06,
"loss": 0.0474,
"step": 8450
},
{
"epoch": 20.18,
"learning_rate": 1e-06,
"loss": 0.0473,
"step": 8475
},
{
"epoch": 20.24,
"learning_rate": 1e-06,
"loss": 0.0474,
"step": 8500
},
{
"epoch": 20.31,
"learning_rate": 1e-06,
"loss": 0.0451,
"step": 8525
},
{
"epoch": 20.37,
"learning_rate": 1e-06,
"loss": 0.0552,
"step": 8550
},
{
"epoch": 20.43,
"learning_rate": 1e-06,
"loss": 0.0525,
"step": 8575
},
{
"epoch": 20.49,
"learning_rate": 1e-06,
"loss": 0.0533,
"step": 8600
},
{
"epoch": 20.55,
"learning_rate": 1e-06,
"loss": 0.0448,
"step": 8625
},
{
"epoch": 20.62,
"learning_rate": 1e-06,
"loss": 0.0458,
"step": 8650
},
{
"epoch": 20.68,
"learning_rate": 1e-06,
"loss": 0.0459,
"step": 8675
},
{
"epoch": 20.74,
"learning_rate": 1e-06,
"loss": 0.0561,
"step": 8700
},
{
"epoch": 20.8,
"learning_rate": 1e-06,
"loss": 0.0465,
"step": 8725
},
{
"epoch": 20.87,
"learning_rate": 1e-06,
"loss": 0.0482,
"step": 8750
},
{
"epoch": 20.93,
"learning_rate": 1e-06,
"loss": 0.0483,
"step": 8775
},
{
"epoch": 20.99,
"learning_rate": 1e-06,
"loss": 0.0505,
"step": 8800
},
{
"epoch": 21.05,
"learning_rate": 1e-06,
"loss": 0.0542,
"step": 8825
},
{
"epoch": 21.11,
"learning_rate": 1e-06,
"loss": 0.0501,
"step": 8850
},
{
"epoch": 21.18,
"learning_rate": 1e-06,
"loss": 0.0437,
"step": 8875
},
{
"epoch": 21.24,
"learning_rate": 1e-06,
"loss": 0.0495,
"step": 8900
},
{
"epoch": 21.3,
"learning_rate": 1e-06,
"loss": 0.0504,
"step": 8925
},
{
"epoch": 21.36,
"learning_rate": 1e-06,
"loss": 0.0492,
"step": 8950
},
{
"epoch": 21.43,
"learning_rate": 1e-06,
"loss": 0.0456,
"step": 8975
},
{
"epoch": 21.49,
"learning_rate": 1e-06,
"loss": 0.039,
"step": 9000
},
{
"epoch": 21.49,
"eval_loss": 0.474609375,
"eval_runtime": 166.7783,
"eval_samples_per_second": 10.169,
"eval_steps_per_second": 0.636,
"eval_wer": 17.672734026745914,
"step": 9000
},
{
"epoch": 21.55,
"learning_rate": 1e-06,
"loss": 0.0418,
"step": 9025
},
{
"epoch": 21.61,
"learning_rate": 1e-06,
"loss": 0.0516,
"step": 9050
},
{
"epoch": 21.67,
"learning_rate": 1e-06,
"loss": 0.0515,
"step": 9075
},
{
"epoch": 21.74,
"learning_rate": 1e-06,
"loss": 0.0494,
"step": 9100
},
{
"epoch": 21.8,
"learning_rate": 1e-06,
"loss": 0.0426,
"step": 9125
},
{
"epoch": 21.86,
"learning_rate": 1e-06,
"loss": 0.0511,
"step": 9150
},
{
"epoch": 21.92,
"learning_rate": 1e-06,
"loss": 0.0491,
"step": 9175
},
{
"epoch": 21.99,
"learning_rate": 1e-06,
"loss": 0.0479,
"step": 9200
},
{
"epoch": 22.05,
"learning_rate": 1e-06,
"loss": 0.0434,
"step": 9225
},
{
"epoch": 22.11,
"learning_rate": 1e-06,
"loss": 0.0447,
"step": 9250
},
{
"epoch": 22.17,
"learning_rate": 1e-06,
"loss": 0.0497,
"step": 9275
},
{
"epoch": 22.23,
"learning_rate": 1e-06,
"loss": 0.0418,
"step": 9300
},
{
"epoch": 22.3,
"learning_rate": 1e-06,
"loss": 0.0487,
"step": 9325
},
{
"epoch": 22.36,
"learning_rate": 1e-06,
"loss": 0.0507,
"step": 9350
},
{
"epoch": 22.42,
"learning_rate": 1e-06,
"loss": 0.0416,
"step": 9375
},
{
"epoch": 22.48,
"learning_rate": 1e-06,
"loss": 0.0433,
"step": 9400
},
{
"epoch": 22.54,
"learning_rate": 1e-06,
"loss": 0.0484,
"step": 9425
},
{
"epoch": 22.61,
"learning_rate": 1e-06,
"loss": 0.0499,
"step": 9450
},
{
"epoch": 22.67,
"learning_rate": 1e-06,
"loss": 0.0517,
"step": 9475
},
{
"epoch": 22.73,
"learning_rate": 1e-06,
"loss": 0.042,
"step": 9500
},
{
"epoch": 22.79,
"learning_rate": 1e-06,
"loss": 0.0419,
"step": 9525
},
{
"epoch": 22.86,
"learning_rate": 1e-06,
"loss": 0.0398,
"step": 9550
},
{
"epoch": 22.92,
"learning_rate": 1e-06,
"loss": 0.0452,
"step": 9575
},
{
"epoch": 22.98,
"learning_rate": 1e-06,
"loss": 0.0473,
"step": 9600
},
{
"epoch": 23.04,
"learning_rate": 1e-06,
"loss": 0.049,
"step": 9625
},
{
"epoch": 23.1,
"learning_rate": 1e-06,
"loss": 0.0422,
"step": 9650
},
{
"epoch": 23.17,
"learning_rate": 1e-06,
"loss": 0.0371,
"step": 9675
},
{
"epoch": 23.23,
"learning_rate": 1e-06,
"loss": 0.0455,
"step": 9700
},
{
"epoch": 23.29,
"learning_rate": 1e-06,
"loss": 0.0402,
"step": 9725
},
{
"epoch": 23.35,
"learning_rate": 1e-06,
"loss": 0.042,
"step": 9750
},
{
"epoch": 23.42,
"learning_rate": 1e-06,
"loss": 0.0454,
"step": 9775
},
{
"epoch": 23.48,
"learning_rate": 1e-06,
"loss": 0.0429,
"step": 9800
},
{
"epoch": 23.54,
"learning_rate": 1e-06,
"loss": 0.0468,
"step": 9825
},
{
"epoch": 23.6,
"learning_rate": 1e-06,
"loss": 0.0416,
"step": 9850
},
{
"epoch": 23.66,
"learning_rate": 1e-06,
"loss": 0.0445,
"step": 9875
},
{
"epoch": 23.73,
"learning_rate": 1e-06,
"loss": 0.0476,
"step": 9900
},
{
"epoch": 23.79,
"learning_rate": 1e-06,
"loss": 0.0446,
"step": 9925
},
{
"epoch": 23.85,
"learning_rate": 1e-06,
"loss": 0.0407,
"step": 9950
},
{
"epoch": 23.91,
"learning_rate": 1e-06,
"loss": 0.0453,
"step": 9975
},
{
"epoch": 23.98,
"learning_rate": 1e-06,
"loss": 0.0447,
"step": 10000
},
{
"epoch": 23.98,
"eval_loss": 0.47607421875,
"eval_runtime": 167.3024,
"eval_samples_per_second": 10.137,
"eval_steps_per_second": 0.634,
"eval_wer": 17.663447251114412,
"step": 10000
},
{
"epoch": 24.04,
"learning_rate": 1e-06,
"loss": 0.0424,
"step": 10025
},
{
"epoch": 24.1,
"learning_rate": 1e-06,
"loss": 0.0398,
"step": 10050
},
{
"epoch": 24.16,
"learning_rate": 1e-06,
"loss": 0.0469,
"step": 10075
},
{
"epoch": 24.22,
"learning_rate": 1e-06,
"loss": 0.0351,
"step": 10100
},
{
"epoch": 24.29,
"learning_rate": 1e-06,
"loss": 0.0484,
"step": 10125
},
{
"epoch": 24.35,
"learning_rate": 1e-06,
"loss": 0.0416,
"step": 10150
},
{
"epoch": 24.41,
"learning_rate": 1e-06,
"loss": 0.0417,
"step": 10175
},
{
"epoch": 24.47,
"learning_rate": 1e-06,
"loss": 0.043,
"step": 10200
},
{
"epoch": 24.53,
"learning_rate": 1e-06,
"loss": 0.0483,
"step": 10225
},
{
"epoch": 24.6,
"learning_rate": 1e-06,
"loss": 0.0428,
"step": 10250
},
{
"epoch": 24.66,
"learning_rate": 1e-06,
"loss": 0.0382,
"step": 10275
},
{
"epoch": 24.72,
"learning_rate": 1e-06,
"loss": 0.0412,
"step": 10300
},
{
"epoch": 24.78,
"learning_rate": 1e-06,
"loss": 0.0392,
"step": 10325
},
{
"epoch": 24.85,
"learning_rate": 1e-06,
"loss": 0.0417,
"step": 10350
},
{
"epoch": 24.91,
"learning_rate": 1e-06,
"loss": 0.0481,
"step": 10375
},
{
"epoch": 24.97,
"learning_rate": 1e-06,
"loss": 0.0397,
"step": 10400
},
{
"epoch": 25.03,
"learning_rate": 1e-06,
"loss": 0.0388,
"step": 10425
},
{
"epoch": 25.09,
"learning_rate": 1e-06,
"loss": 0.0366,
"step": 10450
},
{
"epoch": 25.16,
"learning_rate": 1e-06,
"loss": 0.0424,
"step": 10475
},
{
"epoch": 25.22,
"learning_rate": 1e-06,
"loss": 0.0443,
"step": 10500
},
{
"epoch": 25.28,
"learning_rate": 1e-06,
"loss": 0.037,
"step": 10525
},
{
"epoch": 25.34,
"learning_rate": 1e-06,
"loss": 0.0422,
"step": 10550
},
{
"epoch": 25.41,
"learning_rate": 1e-06,
"loss": 0.0388,
"step": 10575
},
{
"epoch": 25.47,
"learning_rate": 1e-06,
"loss": 0.0366,
"step": 10600
},
{
"epoch": 25.53,
"learning_rate": 1e-06,
"loss": 0.0447,
"step": 10625
},
{
"epoch": 25.59,
"learning_rate": 1e-06,
"loss": 0.036,
"step": 10650
},
{
"epoch": 25.65,
"learning_rate": 1e-06,
"loss": 0.0408,
"step": 10675
},
{
"epoch": 25.72,
"learning_rate": 1e-06,
"loss": 0.0384,
"step": 10700
},
{
"epoch": 25.78,
"learning_rate": 1e-06,
"loss": 0.0464,
"step": 10725
},
{
"epoch": 25.84,
"learning_rate": 1e-06,
"loss": 0.0365,
"step": 10750
},
{
"epoch": 25.9,
"learning_rate": 1e-06,
"loss": 0.0398,
"step": 10775
},
{
"epoch": 25.97,
"learning_rate": 1e-06,
"loss": 0.0391,
"step": 10800
},
{
"epoch": 26.03,
"learning_rate": 1e-06,
"loss": 0.0392,
"step": 10825
},
{
"epoch": 26.09,
"learning_rate": 1e-06,
"loss": 0.039,
"step": 10850
},
{
"epoch": 26.15,
"learning_rate": 1e-06,
"loss": 0.0413,
"step": 10875
},
{
"epoch": 26.21,
"learning_rate": 1e-06,
"loss": 0.0407,
"step": 10900
},
{
"epoch": 26.28,
"learning_rate": 1e-06,
"loss": 0.0375,
"step": 10925
},
{
"epoch": 26.34,
"learning_rate": 1e-06,
"loss": 0.0342,
"step": 10950
},
{
"epoch": 26.4,
"learning_rate": 1e-06,
"loss": 0.0382,
"step": 10975
},
{
"epoch": 26.46,
"learning_rate": 1e-06,
"loss": 0.0393,
"step": 11000
},
{
"epoch": 26.46,
"eval_loss": 0.479248046875,
"eval_runtime": 166.6895,
"eval_samples_per_second": 10.175,
"eval_steps_per_second": 0.636,
"eval_wer": 17.765601783060923,
"step": 11000
},
{
"epoch": 26.52,
"learning_rate": 1e-06,
"loss": 0.0419,
"step": 11025
},
{
"epoch": 26.59,
"learning_rate": 1e-06,
"loss": 0.0327,
"step": 11050
},
{
"epoch": 26.65,
"learning_rate": 1e-06,
"loss": 0.0419,
"step": 11075
},
{
"epoch": 26.71,
"learning_rate": 1e-06,
"loss": 0.0384,
"step": 11100
},
{
"epoch": 26.77,
"learning_rate": 1e-06,
"loss": 0.0432,
"step": 11125
},
{
"epoch": 26.84,
"learning_rate": 1e-06,
"loss": 0.0376,
"step": 11150
},
{
"epoch": 26.9,
"learning_rate": 1e-06,
"loss": 0.0378,
"step": 11175
},
{
"epoch": 26.96,
"learning_rate": 1e-06,
"loss": 0.0395,
"step": 11200
},
{
"epoch": 27.02,
"learning_rate": 1e-06,
"loss": 0.0337,
"step": 11225
},
{
"epoch": 27.08,
"learning_rate": 1e-06,
"loss": 0.0367,
"step": 11250
},
{
"epoch": 27.15,
"learning_rate": 1e-06,
"loss": 0.0367,
"step": 11275
},
{
"epoch": 27.21,
"learning_rate": 1e-06,
"loss": 0.0376,
"step": 11300
},
{
"epoch": 27.27,
"learning_rate": 1e-06,
"loss": 0.0412,
"step": 11325
},
{
"epoch": 27.33,
"learning_rate": 1e-06,
"loss": 0.0323,
"step": 11350
},
{
"epoch": 27.4,
"learning_rate": 1e-06,
"loss": 0.0335,
"step": 11375
},
{
"epoch": 27.46,
"learning_rate": 1e-06,
"loss": 0.0365,
"step": 11400
},
{
"epoch": 27.52,
"learning_rate": 1e-06,
"loss": 0.0374,
"step": 11425
},
{
"epoch": 27.58,
"learning_rate": 1e-06,
"loss": 0.0383,
"step": 11450
},
{
"epoch": 27.64,
"learning_rate": 1e-06,
"loss": 0.0326,
"step": 11475
},
{
"epoch": 27.71,
"learning_rate": 1e-06,
"loss": 0.0405,
"step": 11500
},
{
"epoch": 27.77,
"learning_rate": 1e-06,
"loss": 0.0347,
"step": 11525
},
{
"epoch": 27.83,
"learning_rate": 1e-06,
"loss": 0.0383,
"step": 11550
},
{
"epoch": 27.89,
"learning_rate": 1e-06,
"loss": 0.0361,
"step": 11575
},
{
"epoch": 27.96,
"learning_rate": 1e-06,
"loss": 0.0341,
"step": 11600
},
{
"epoch": 28.02,
"learning_rate": 1e-06,
"loss": 0.0376,
"step": 11625
},
{
"epoch": 28.08,
"learning_rate": 1e-06,
"loss": 0.0332,
"step": 11650
},
{
"epoch": 28.14,
"learning_rate": 1e-06,
"loss": 0.0429,
"step": 11675
},
{
"epoch": 28.2,
"learning_rate": 1e-06,
"loss": 0.0373,
"step": 11700
},
{
"epoch": 28.27,
"learning_rate": 1e-06,
"loss": 0.0354,
"step": 11725
},
{
"epoch": 28.33,
"learning_rate": 1e-06,
"loss": 0.0355,
"step": 11750
},
{
"epoch": 28.39,
"learning_rate": 1e-06,
"loss": 0.0368,
"step": 11775
},
{
"epoch": 28.45,
"learning_rate": 1e-06,
"loss": 0.0338,
"step": 11800
},
{
"epoch": 28.51,
"learning_rate": 1e-06,
"loss": 0.0363,
"step": 11825
},
{
"epoch": 28.58,
"learning_rate": 1e-06,
"loss": 0.0391,
"step": 11850
},
{
"epoch": 28.64,
"learning_rate": 1e-06,
"loss": 0.0309,
"step": 11875
},
{
"epoch": 28.7,
"learning_rate": 1e-06,
"loss": 0.0353,
"step": 11900
},
{
"epoch": 28.76,
"learning_rate": 1e-06,
"loss": 0.0398,
"step": 11925
},
{
"epoch": 28.83,
"learning_rate": 1e-06,
"loss": 0.0394,
"step": 11950
},
{
"epoch": 28.89,
"learning_rate": 1e-06,
"loss": 0.0351,
"step": 11975
},
{
"epoch": 28.95,
"learning_rate": 1e-06,
"loss": 0.0308,
"step": 12000
},
{
"epoch": 28.95,
"eval_loss": 0.485107421875,
"eval_runtime": 167.5369,
"eval_samples_per_second": 10.123,
"eval_steps_per_second": 0.633,
"eval_wer": 17.86775631500743,
"step": 12000
},
{
"epoch": 29.01,
"learning_rate": 1e-06,
"loss": 0.0341,
"step": 12025
},
{
"epoch": 29.07,
"learning_rate": 1e-06,
"loss": 0.0358,
"step": 12050
},
{
"epoch": 29.14,
"learning_rate": 1e-06,
"loss": 0.0349,
"step": 12075
},
{
"epoch": 29.2,
"learning_rate": 1e-06,
"loss": 0.0382,
"step": 12100
},
{
"epoch": 29.26,
"learning_rate": 1e-06,
"loss": 0.0312,
"step": 12125
},
{
"epoch": 29.32,
"learning_rate": 1e-06,
"loss": 0.0344,
"step": 12150
},
{
"epoch": 29.39,
"learning_rate": 1e-06,
"loss": 0.0298,
"step": 12175
},
{
"epoch": 29.45,
"learning_rate": 1e-06,
"loss": 0.0313,
"step": 12200
},
{
"epoch": 29.51,
"learning_rate": 1e-06,
"loss": 0.0277,
"step": 12225
},
{
"epoch": 29.57,
"learning_rate": 1e-06,
"loss": 0.0326,
"step": 12250
},
{
"epoch": 29.63,
"learning_rate": 1e-06,
"loss": 0.0328,
"step": 12275
},
{
"epoch": 29.7,
"learning_rate": 1e-06,
"loss": 0.0333,
"step": 12300
},
{
"epoch": 29.76,
"learning_rate": 1e-06,
"loss": 0.033,
"step": 12325
},
{
"epoch": 29.82,
"learning_rate": 1e-06,
"loss": 0.0415,
"step": 12350
},
{
"epoch": 29.88,
"learning_rate": 1e-06,
"loss": 0.0346,
"step": 12375
},
{
"epoch": 29.95,
"learning_rate": 1e-06,
"loss": 0.0389,
"step": 12400
},
{
"epoch": 30.01,
"learning_rate": 1e-06,
"loss": 0.0355,
"step": 12425
},
{
"epoch": 30.07,
"learning_rate": 1e-06,
"loss": 0.0406,
"step": 12450
},
{
"epoch": 30.13,
"learning_rate": 1e-06,
"loss": 0.031,
"step": 12475
},
{
"epoch": 30.19,
"learning_rate": 1e-06,
"loss": 0.0311,
"step": 12500
},
{
"epoch": 30.26,
"learning_rate": 1e-06,
"loss": 0.0357,
"step": 12525
},
{
"epoch": 30.32,
"learning_rate": 1e-06,
"loss": 0.0351,
"step": 12550
},
{
"epoch": 30.38,
"learning_rate": 1e-06,
"loss": 0.0355,
"step": 12575
},
{
"epoch": 30.44,
"learning_rate": 1e-06,
"loss": 0.0347,
"step": 12600
},
{
"epoch": 30.5,
"learning_rate": 1e-06,
"loss": 0.0293,
"step": 12625
},
{
"epoch": 30.57,
"learning_rate": 1e-06,
"loss": 0.0346,
"step": 12650
},
{
"epoch": 30.63,
"learning_rate": 1e-06,
"loss": 0.0324,
"step": 12675
},
{
"epoch": 30.69,
"learning_rate": 1e-06,
"loss": 0.0374,
"step": 12700
},
{
"epoch": 30.75,
"learning_rate": 1e-06,
"loss": 0.0296,
"step": 12725
},
{
"epoch": 30.82,
"learning_rate": 1e-06,
"loss": 0.0308,
"step": 12750
},
{
"epoch": 30.88,
"learning_rate": 1e-06,
"loss": 0.0375,
"step": 12775
},
{
"epoch": 30.94,
"learning_rate": 1e-06,
"loss": 0.0284,
"step": 12800
},
{
"epoch": 31.0,
"learning_rate": 1e-06,
"loss": 0.0293,
"step": 12825
},
{
"epoch": 31.06,
"learning_rate": 1e-06,
"loss": 0.0354,
"step": 12850
},
{
"epoch": 31.13,
"learning_rate": 1e-06,
"loss": 0.0267,
"step": 12875
},
{
"epoch": 31.19,
"learning_rate": 1e-06,
"loss": 0.0342,
"step": 12900
},
{
"epoch": 31.25,
"learning_rate": 1e-06,
"loss": 0.0319,
"step": 12925
},
{
"epoch": 31.31,
"learning_rate": 1e-06,
"loss": 0.0291,
"step": 12950
},
{
"epoch": 31.38,
"learning_rate": 1e-06,
"loss": 0.0339,
"step": 12975
},
{
"epoch": 31.44,
"learning_rate": 1e-06,
"loss": 0.0301,
"step": 13000
},
{
"epoch": 31.44,
"eval_loss": 0.484619140625,
"eval_runtime": 166.8919,
"eval_samples_per_second": 10.162,
"eval_steps_per_second": 0.635,
"eval_wer": 17.449851411589894,
"step": 13000
},
{
"epoch": 31.5,
"learning_rate": 1e-06,
"loss": 0.0311,
"step": 13025
},
{
"epoch": 31.56,
"learning_rate": 1e-06,
"loss": 0.0298,
"step": 13050
},
{
"epoch": 31.62,
"learning_rate": 1e-06,
"loss": 0.034,
"step": 13075
},
{
"epoch": 31.69,
"learning_rate": 1e-06,
"loss": 0.0323,
"step": 13100
},
{
"epoch": 31.75,
"learning_rate": 1e-06,
"loss": 0.0376,
"step": 13125
},
{
"epoch": 31.81,
"learning_rate": 1e-06,
"loss": 0.0315,
"step": 13150
},
{
"epoch": 31.87,
"learning_rate": 1e-06,
"loss": 0.0386,
"step": 13175
},
{
"epoch": 31.94,
"learning_rate": 1e-06,
"loss": 0.0294,
"step": 13200
},
{
"epoch": 32.0,
"learning_rate": 1e-06,
"loss": 0.0317,
"step": 13225
},
{
"epoch": 32.06,
"learning_rate": 1e-06,
"loss": 0.0302,
"step": 13250
},
{
"epoch": 32.12,
"learning_rate": 1e-06,
"loss": 0.0322,
"step": 13275
},
{
"epoch": 32.18,
"learning_rate": 1e-06,
"loss": 0.0265,
"step": 13300
},
{
"epoch": 32.25,
"learning_rate": 1e-06,
"loss": 0.0306,
"step": 13325
},
{
"epoch": 32.31,
"learning_rate": 1e-06,
"loss": 0.0276,
"step": 13350
},
{
"epoch": 32.37,
"learning_rate": 1e-06,
"loss": 0.0303,
"step": 13375
},
{
"epoch": 32.43,
"learning_rate": 1e-06,
"loss": 0.0306,
"step": 13400
},
{
"epoch": 32.5,
"learning_rate": 1e-06,
"loss": 0.0254,
"step": 13425
},
{
"epoch": 32.56,
"learning_rate": 1e-06,
"loss": 0.0274,
"step": 13450
},
{
"epoch": 32.62,
"learning_rate": 1e-06,
"loss": 0.0327,
"step": 13475
},
{
"epoch": 32.68,
"learning_rate": 1e-06,
"loss": 0.0297,
"step": 13500
},
{
"epoch": 32.74,
"learning_rate": 1e-06,
"loss": 0.0367,
"step": 13525
},
{
"epoch": 32.81,
"learning_rate": 1e-06,
"loss": 0.0315,
"step": 13550
},
{
"epoch": 32.87,
"learning_rate": 1e-06,
"loss": 0.0333,
"step": 13575
},
{
"epoch": 32.93,
"learning_rate": 1e-06,
"loss": 0.0388,
"step": 13600
},
{
"epoch": 32.99,
"learning_rate": 1e-06,
"loss": 0.0286,
"step": 13625
},
{
"epoch": 33.05,
"learning_rate": 1e-06,
"loss": 0.0299,
"step": 13650
},
{
"epoch": 33.12,
"learning_rate": 1e-06,
"loss": 0.0291,
"step": 13675
},
{
"epoch": 33.18,
"learning_rate": 1e-06,
"loss": 0.0259,
"step": 13700
},
{
"epoch": 33.24,
"learning_rate": 1e-06,
"loss": 0.0296,
"step": 13725
},
{
"epoch": 33.3,
"learning_rate": 1e-06,
"loss": 0.0267,
"step": 13750
},
{
"epoch": 33.37,
"learning_rate": 1e-06,
"loss": 0.0317,
"step": 13775
},
{
"epoch": 33.43,
"learning_rate": 1e-06,
"loss": 0.0296,
"step": 13800
},
{
"epoch": 33.49,
"learning_rate": 1e-06,
"loss": 0.0297,
"step": 13825
},
{
"epoch": 33.55,
"learning_rate": 1e-06,
"loss": 0.0314,
"step": 13850
},
{
"epoch": 33.61,
"learning_rate": 1e-06,
"loss": 0.0286,
"step": 13875
},
{
"epoch": 33.68,
"learning_rate": 1e-06,
"loss": 0.0306,
"step": 13900
},
{
"epoch": 33.74,
"learning_rate": 1e-06,
"loss": 0.0309,
"step": 13925
},
{
"epoch": 33.8,
"learning_rate": 1e-06,
"loss": 0.0294,
"step": 13950
},
{
"epoch": 33.86,
"learning_rate": 1e-06,
"loss": 0.0346,
"step": 13975
},
{
"epoch": 33.93,
"learning_rate": 1e-06,
"loss": 0.031,
"step": 14000
},
{
"epoch": 33.93,
"eval_loss": 0.48486328125,
"eval_runtime": 166.8129,
"eval_samples_per_second": 10.167,
"eval_steps_per_second": 0.635,
"eval_wer": 17.830609212481427,
"step": 14000
},
{
"epoch": 33.99,
"learning_rate": 1e-06,
"loss": 0.0261,
"step": 14025
},
{
"epoch": 34.05,
"learning_rate": 1e-06,
"loss": 0.0344,
"step": 14050
},
{
"epoch": 34.11,
"learning_rate": 1e-06,
"loss": 0.0249,
"step": 14075
},
{
"epoch": 34.17,
"learning_rate": 1e-06,
"loss": 0.0258,
"step": 14100
},
{
"epoch": 34.24,
"learning_rate": 1e-06,
"loss": 0.0289,
"step": 14125
},
{
"epoch": 34.3,
"learning_rate": 1e-06,
"loss": 0.0306,
"step": 14150
},
{
"epoch": 34.36,
"learning_rate": 1e-06,
"loss": 0.0313,
"step": 14175
},
{
"epoch": 34.42,
"learning_rate": 1e-06,
"loss": 0.0286,
"step": 14200
},
{
"epoch": 34.49,
"learning_rate": 1e-06,
"loss": 0.0286,
"step": 14225
},
{
"epoch": 34.55,
"learning_rate": 1e-06,
"loss": 0.0274,
"step": 14250
},
{
"epoch": 34.61,
"learning_rate": 1e-06,
"loss": 0.0225,
"step": 14275
},
{
"epoch": 34.67,
"learning_rate": 1e-06,
"loss": 0.028,
"step": 14300
},
{
"epoch": 34.73,
"learning_rate": 1e-06,
"loss": 0.0288,
"step": 14325
},
{
"epoch": 34.8,
"learning_rate": 1e-06,
"loss": 0.0292,
"step": 14350
},
{
"epoch": 34.86,
"learning_rate": 1e-06,
"loss": 0.0283,
"step": 14375
},
{
"epoch": 34.92,
"learning_rate": 1e-06,
"loss": 0.0319,
"step": 14400
},
{
"epoch": 34.98,
"learning_rate": 1e-06,
"loss": 0.0254,
"step": 14425
},
{
"epoch": 35.04,
"learning_rate": 1e-06,
"loss": 0.0286,
"step": 14450
},
{
"epoch": 35.11,
"learning_rate": 1e-06,
"loss": 0.0257,
"step": 14475
},
{
"epoch": 35.17,
"learning_rate": 1e-06,
"loss": 0.0284,
"step": 14500
},
{
"epoch": 35.23,
"learning_rate": 1e-06,
"loss": 0.0306,
"step": 14525
},
{
"epoch": 35.29,
"learning_rate": 1e-06,
"loss": 0.0272,
"step": 14550
},
{
"epoch": 35.36,
"learning_rate": 1e-06,
"loss": 0.0313,
"step": 14575
},
{
"epoch": 35.42,
"learning_rate": 1e-06,
"loss": 0.0277,
"step": 14600
},
{
"epoch": 35.48,
"learning_rate": 1e-06,
"loss": 0.0251,
"step": 14625
},
{
"epoch": 35.54,
"learning_rate": 1e-06,
"loss": 0.0217,
"step": 14650
},
{
"epoch": 35.6,
"learning_rate": 1e-06,
"loss": 0.0296,
"step": 14675
},
{
"epoch": 35.67,
"learning_rate": 1e-06,
"loss": 0.0245,
"step": 14700
},
{
"epoch": 35.73,
"learning_rate": 1e-06,
"loss": 0.0257,
"step": 14725
},
{
"epoch": 35.79,
"learning_rate": 1e-06,
"loss": 0.0289,
"step": 14750
},
{
"epoch": 35.85,
"learning_rate": 1e-06,
"loss": 0.0265,
"step": 14775
},
{
"epoch": 35.92,
"learning_rate": 1e-06,
"loss": 0.0279,
"step": 14800
},
{
"epoch": 35.98,
"learning_rate": 1e-06,
"loss": 0.0292,
"step": 14825
},
{
"epoch": 36.04,
"learning_rate": 1e-06,
"loss": 0.0294,
"step": 14850
},
{
"epoch": 36.1,
"learning_rate": 1e-06,
"loss": 0.0256,
"step": 14875
},
{
"epoch": 36.16,
"learning_rate": 1e-06,
"loss": 0.0281,
"step": 14900
},
{
"epoch": 36.23,
"learning_rate": 1e-06,
"loss": 0.0278,
"step": 14925
},
{
"epoch": 36.29,
"learning_rate": 1e-06,
"loss": 0.0267,
"step": 14950
},
{
"epoch": 36.35,
"learning_rate": 1e-06,
"loss": 0.0208,
"step": 14975
},
{
"epoch": 36.41,
"learning_rate": 1e-06,
"loss": 0.0263,
"step": 15000
},
{
"epoch": 36.41,
"eval_loss": 0.488037109375,
"eval_runtime": 166.7242,
"eval_samples_per_second": 10.172,
"eval_steps_per_second": 0.636,
"eval_wer": 17.61701337295691,
"step": 15000
},
{
"epoch": 36.48,
"learning_rate": 1e-06,
"loss": 0.0311,
"step": 15025
},
{
"epoch": 36.54,
"learning_rate": 1e-06,
"loss": 0.0255,
"step": 15050
},
{
"epoch": 36.6,
"learning_rate": 1e-06,
"loss": 0.0267,
"step": 15075
},
{
"epoch": 36.66,
"learning_rate": 1e-06,
"loss": 0.0277,
"step": 15100
},
{
"epoch": 36.72,
"learning_rate": 1e-06,
"loss": 0.0241,
"step": 15125
},
{
"epoch": 36.79,
"learning_rate": 1e-06,
"loss": 0.0267,
"step": 15150
},
{
"epoch": 36.85,
"learning_rate": 1e-06,
"loss": 0.0238,
"step": 15175
},
{
"epoch": 36.91,
"learning_rate": 1e-06,
"loss": 0.0289,
"step": 15200
},
{
"epoch": 36.97,
"learning_rate": 1e-06,
"loss": 0.026,
"step": 15225
},
{
"epoch": 37.03,
"learning_rate": 1e-06,
"loss": 0.0249,
"step": 15250
},
{
"epoch": 37.1,
"learning_rate": 1e-06,
"loss": 0.0259,
"step": 15275
},
{
"epoch": 37.16,
"learning_rate": 1e-06,
"loss": 0.0266,
"step": 15300
},
{
"epoch": 37.22,
"learning_rate": 1e-06,
"loss": 0.029,
"step": 15325
},
{
"epoch": 37.28,
"learning_rate": 1e-06,
"loss": 0.0277,
"step": 15350
},
{
"epoch": 37.35,
"learning_rate": 1e-06,
"loss": 0.0253,
"step": 15375
},
{
"epoch": 37.41,
"learning_rate": 1e-06,
"loss": 0.0276,
"step": 15400
},
{
"epoch": 37.47,
"learning_rate": 1e-06,
"loss": 0.0236,
"step": 15425
},
{
"epoch": 37.53,
"learning_rate": 1e-06,
"loss": 0.0245,
"step": 15450
},
{
"epoch": 37.59,
"learning_rate": 1e-06,
"loss": 0.0248,
"step": 15475
},
{
"epoch": 37.66,
"learning_rate": 1e-06,
"loss": 0.0248,
"step": 15500
},
{
"epoch": 37.72,
"learning_rate": 1e-06,
"loss": 0.022,
"step": 15525
},
{
"epoch": 37.78,
"learning_rate": 1e-06,
"loss": 0.0261,
"step": 15550
},
{
"epoch": 37.84,
"learning_rate": 1e-06,
"loss": 0.0265,
"step": 15575
},
{
"epoch": 37.91,
"learning_rate": 1e-06,
"loss": 0.0241,
"step": 15600
},
{
"epoch": 37.97,
"learning_rate": 1e-06,
"loss": 0.0261,
"step": 15625
},
{
"epoch": 38.03,
"learning_rate": 1e-06,
"loss": 0.0239,
"step": 15650
},
{
"epoch": 38.09,
"learning_rate": 1e-06,
"loss": 0.0255,
"step": 15675
},
{
"epoch": 38.15,
"learning_rate": 1e-06,
"loss": 0.0251,
"step": 15700
},
{
"epoch": 38.22,
"learning_rate": 1e-06,
"loss": 0.0255,
"step": 15725
},
{
"epoch": 38.28,
"learning_rate": 1e-06,
"loss": 0.0255,
"step": 15750
},
{
"epoch": 38.34,
"learning_rate": 1e-06,
"loss": 0.0252,
"step": 15775
},
{
"epoch": 38.4,
"learning_rate": 1e-06,
"loss": 0.026,
"step": 15800
},
{
"epoch": 38.47,
"learning_rate": 1e-06,
"loss": 0.0261,
"step": 15825
},
{
"epoch": 38.53,
"learning_rate": 1e-06,
"loss": 0.0243,
"step": 15850
},
{
"epoch": 38.59,
"learning_rate": 1e-06,
"loss": 0.0253,
"step": 15875
},
{
"epoch": 38.65,
"learning_rate": 1e-06,
"loss": 0.0276,
"step": 15900
},
{
"epoch": 38.71,
"learning_rate": 1e-06,
"loss": 0.0239,
"step": 15925
},
{
"epoch": 38.78,
"learning_rate": 1e-06,
"loss": 0.0259,
"step": 15950
},
{
"epoch": 38.84,
"learning_rate": 1e-06,
"loss": 0.0232,
"step": 15975
},
{
"epoch": 38.9,
"learning_rate": 1e-06,
"loss": 0.0256,
"step": 16000
},
{
"epoch": 38.9,
"eval_loss": 0.487060546875,
"eval_runtime": 166.9973,
"eval_samples_per_second": 10.156,
"eval_steps_per_second": 0.635,
"eval_wer": 17.199108469539375,
"step": 16000
},
{
"epoch": 38.96,
"learning_rate": 1e-06,
"loss": 0.0228,
"step": 16025
},
{
"epoch": 39.02,
"learning_rate": 1e-06,
"loss": 0.0208,
"step": 16050
},
{
"epoch": 39.09,
"learning_rate": 1e-06,
"loss": 0.0237,
"step": 16075
},
{
"epoch": 39.15,
"learning_rate": 1e-06,
"loss": 0.0238,
"step": 16100
},
{
"epoch": 39.21,
"learning_rate": 1e-06,
"loss": 0.025,
"step": 16125
},
{
"epoch": 39.27,
"learning_rate": 1e-06,
"loss": 0.0219,
"step": 16150
},
{
"epoch": 39.34,
"learning_rate": 1e-06,
"loss": 0.0214,
"step": 16175
},
{
"epoch": 39.4,
"learning_rate": 1e-06,
"loss": 0.0235,
"step": 16200
},
{
"epoch": 39.46,
"learning_rate": 1e-06,
"loss": 0.0219,
"step": 16225
},
{
"epoch": 39.52,
"learning_rate": 1e-06,
"loss": 0.0249,
"step": 16250
},
{
"epoch": 39.58,
"learning_rate": 1e-06,
"loss": 0.0265,
"step": 16275
},
{
"epoch": 39.65,
"learning_rate": 1e-06,
"loss": 0.0252,
"step": 16300
},
{
"epoch": 39.71,
"learning_rate": 1e-06,
"loss": 0.0224,
"step": 16325
},
{
"epoch": 39.77,
"learning_rate": 1e-06,
"loss": 0.0289,
"step": 16350
},
{
"epoch": 39.83,
"learning_rate": 1e-06,
"loss": 0.0218,
"step": 16375
},
{
"epoch": 39.9,
"learning_rate": 1e-06,
"loss": 0.0256,
"step": 16400
},
{
"epoch": 39.96,
"learning_rate": 1e-06,
"loss": 0.0247,
"step": 16425
},
{
"epoch": 40.02,
"learning_rate": 1e-06,
"loss": 0.0233,
"step": 16450
},
{
"epoch": 40.08,
"learning_rate": 1e-06,
"loss": 0.0241,
"step": 16475
},
{
"epoch": 40.14,
"learning_rate": 1e-06,
"loss": 0.0242,
"step": 16500
},
{
"epoch": 40.21,
"learning_rate": 1e-06,
"loss": 0.0225,
"step": 16525
},
{
"epoch": 40.27,
"learning_rate": 1e-06,
"loss": 0.0234,
"step": 16550
},
{
"epoch": 40.33,
"learning_rate": 1e-06,
"loss": 0.0206,
"step": 16575
},
{
"epoch": 40.39,
"learning_rate": 1e-06,
"loss": 0.0234,
"step": 16600
},
{
"epoch": 40.46,
"learning_rate": 1e-06,
"loss": 0.0249,
"step": 16625
},
{
"epoch": 40.52,
"learning_rate": 1e-06,
"loss": 0.0243,
"step": 16650
},
{
"epoch": 40.58,
"learning_rate": 1e-06,
"loss": 0.0218,
"step": 16675
},
{
"epoch": 40.64,
"learning_rate": 1e-06,
"loss": 0.0203,
"step": 16700
},
{
"epoch": 40.7,
"learning_rate": 1e-06,
"loss": 0.024,
"step": 16725
},
{
"epoch": 40.77,
"learning_rate": 1e-06,
"loss": 0.0232,
"step": 16750
},
{
"epoch": 40.83,
"learning_rate": 1e-06,
"loss": 0.0197,
"step": 16775
},
{
"epoch": 40.89,
"learning_rate": 1e-06,
"loss": 0.0257,
"step": 16800
},
{
"epoch": 40.95,
"learning_rate": 1e-06,
"loss": 0.0259,
"step": 16825
},
{
"epoch": 41.01,
"learning_rate": 1e-06,
"loss": 0.0215,
"step": 16850
},
{
"epoch": 41.08,
"learning_rate": 1e-06,
"loss": 0.0228,
"step": 16875
},
{
"epoch": 41.14,
"learning_rate": 1e-06,
"loss": 0.0232,
"step": 16900
},
{
"epoch": 41.2,
"learning_rate": 1e-06,
"loss": 0.025,
"step": 16925
},
{
"epoch": 41.26,
"learning_rate": 1e-06,
"loss": 0.0231,
"step": 16950
},
{
"epoch": 41.33,
"learning_rate": 1e-06,
"loss": 0.0211,
"step": 16975
},
{
"epoch": 41.39,
"learning_rate": 1e-06,
"loss": 0.0236,
"step": 17000
},
{
"epoch": 41.39,
"eval_loss": 0.48828125,
"eval_runtime": 167.0132,
"eval_samples_per_second": 10.155,
"eval_steps_per_second": 0.635,
"eval_wer": 17.26411589895988,
"step": 17000
},
{
"epoch": 41.45,
"learning_rate": 1e-06,
"loss": 0.0257,
"step": 17025
},
{
"epoch": 41.51,
"learning_rate": 1e-06,
"loss": 0.0236,
"step": 17050
},
{
"epoch": 41.57,
"learning_rate": 1e-06,
"loss": 0.0221,
"step": 17075
},
{
"epoch": 41.64,
"learning_rate": 1e-06,
"loss": 0.0182,
"step": 17100
},
{
"epoch": 41.7,
"learning_rate": 1e-06,
"loss": 0.0247,
"step": 17125
},
{
"epoch": 41.76,
"learning_rate": 1e-06,
"loss": 0.0204,
"step": 17150
},
{
"epoch": 41.82,
"learning_rate": 1e-06,
"loss": 0.0221,
"step": 17175
},
{
"epoch": 41.89,
"learning_rate": 1e-06,
"loss": 0.0227,
"step": 17200
},
{
"epoch": 41.95,
"learning_rate": 1e-06,
"loss": 0.0213,
"step": 17225
},
{
"epoch": 42.01,
"learning_rate": 1e-06,
"loss": 0.021,
"step": 17250
},
{
"epoch": 42.07,
"learning_rate": 1e-06,
"loss": 0.0219,
"step": 17275
},
{
"epoch": 42.13,
"learning_rate": 1e-06,
"loss": 0.0179,
"step": 17300
},
{
"epoch": 42.2,
"learning_rate": 1e-06,
"loss": 0.0207,
"step": 17325
},
{
"epoch": 42.26,
"learning_rate": 1e-06,
"loss": 0.0198,
"step": 17350
},
{
"epoch": 42.32,
"learning_rate": 1e-06,
"loss": 0.0192,
"step": 17375
},
{
"epoch": 42.38,
"learning_rate": 1e-06,
"loss": 0.0231,
"step": 17400
},
{
"epoch": 42.45,
"learning_rate": 1e-06,
"loss": 0.0216,
"step": 17425
},
{
"epoch": 42.51,
"learning_rate": 1e-06,
"loss": 0.0203,
"step": 17450
},
{
"epoch": 42.57,
"learning_rate": 1e-06,
"loss": 0.022,
"step": 17475
},
{
"epoch": 42.63,
"learning_rate": 1e-06,
"loss": 0.0198,
"step": 17500
},
{
"epoch": 42.69,
"learning_rate": 1e-06,
"loss": 0.0184,
"step": 17525
},
{
"epoch": 42.76,
"learning_rate": 1e-06,
"loss": 0.0223,
"step": 17550
},
{
"epoch": 42.82,
"learning_rate": 1e-06,
"loss": 0.0221,
"step": 17575
},
{
"epoch": 42.88,
"learning_rate": 1e-06,
"loss": 0.0254,
"step": 17600
},
{
"epoch": 42.94,
"learning_rate": 1e-06,
"loss": 0.024,
"step": 17625
},
{
"epoch": 43.0,
"learning_rate": 1e-06,
"loss": 0.02,
"step": 17650
},
{
"epoch": 43.07,
"learning_rate": 1e-06,
"loss": 0.022,
"step": 17675
},
{
"epoch": 43.13,
"learning_rate": 1e-06,
"loss": 0.0198,
"step": 17700
},
{
"epoch": 43.19,
"learning_rate": 1e-06,
"loss": 0.019,
"step": 17725
},
{
"epoch": 43.25,
"learning_rate": 1e-06,
"loss": 0.0214,
"step": 17750
},
{
"epoch": 43.32,
"learning_rate": 1e-06,
"loss": 0.0219,
"step": 17775
},
{
"epoch": 43.38,
"learning_rate": 1e-06,
"loss": 0.0176,
"step": 17800
},
{
"epoch": 43.44,
"learning_rate": 1e-06,
"loss": 0.0224,
"step": 17825
},
{
"epoch": 43.5,
"learning_rate": 1e-06,
"loss": 0.023,
"step": 17850
},
{
"epoch": 43.56,
"learning_rate": 1e-06,
"loss": 0.0185,
"step": 17875
},
{
"epoch": 43.63,
"learning_rate": 1e-06,
"loss": 0.0225,
"step": 17900
},
{
"epoch": 43.69,
"learning_rate": 1e-06,
"loss": 0.0201,
"step": 17925
},
{
"epoch": 43.75,
"learning_rate": 1e-06,
"loss": 0.022,
"step": 17950
},
{
"epoch": 43.81,
"learning_rate": 1e-06,
"loss": 0.0219,
"step": 17975
},
{
"epoch": 43.88,
"learning_rate": 1e-06,
"loss": 0.0195,
"step": 18000
},
{
"epoch": 43.88,
"eval_loss": 0.488037109375,
"eval_runtime": 167.5739,
"eval_samples_per_second": 10.121,
"eval_steps_per_second": 0.633,
"eval_wer": 17.570579494799404,
"step": 18000
},
{
"epoch": 43.94,
"learning_rate": 1e-06,
"loss": 0.0189,
"step": 18025
},
{
"epoch": 44.0,
"learning_rate": 1e-06,
"loss": 0.0231,
"step": 18050
},
{
"epoch": 44.06,
"learning_rate": 1e-06,
"loss": 0.0194,
"step": 18075
},
{
"epoch": 44.12,
"learning_rate": 1e-06,
"loss": 0.022,
"step": 18100
},
{
"epoch": 44.19,
"learning_rate": 1e-06,
"loss": 0.0199,
"step": 18125
},
{
"epoch": 44.25,
"learning_rate": 1e-06,
"loss": 0.0227,
"step": 18150
},
{
"epoch": 44.31,
"learning_rate": 1e-06,
"loss": 0.0228,
"step": 18175
},
{
"epoch": 44.37,
"learning_rate": 1e-06,
"loss": 0.0181,
"step": 18200
},
{
"epoch": 44.44,
"learning_rate": 1e-06,
"loss": 0.0171,
"step": 18225
},
{
"epoch": 44.5,
"learning_rate": 1e-06,
"loss": 0.0207,
"step": 18250
},
{
"epoch": 44.56,
"learning_rate": 1e-06,
"loss": 0.022,
"step": 18275
},
{
"epoch": 44.62,
"learning_rate": 1e-06,
"loss": 0.0177,
"step": 18300
},
{
"epoch": 44.68,
"learning_rate": 1e-06,
"loss": 0.0191,
"step": 18325
},
{
"epoch": 44.75,
"learning_rate": 1e-06,
"loss": 0.022,
"step": 18350
},
{
"epoch": 44.81,
"learning_rate": 1e-06,
"loss": 0.0222,
"step": 18375
},
{
"epoch": 44.87,
"learning_rate": 1e-06,
"loss": 0.0194,
"step": 18400
},
{
"epoch": 44.93,
"learning_rate": 1e-06,
"loss": 0.0207,
"step": 18425
},
{
"epoch": 45.0,
"learning_rate": 1e-06,
"loss": 0.019,
"step": 18450
},
{
"epoch": 45.06,
"learning_rate": 1e-06,
"loss": 0.0181,
"step": 18475
},
{
"epoch": 45.12,
"learning_rate": 1e-06,
"loss": 0.0222,
"step": 18500
},
{
"epoch": 45.18,
"learning_rate": 1e-06,
"loss": 0.0181,
"step": 18525
},
{
"epoch": 45.24,
"learning_rate": 1e-06,
"loss": 0.018,
"step": 18550
},
{
"epoch": 45.31,
"learning_rate": 1e-06,
"loss": 0.02,
"step": 18575
},
{
"epoch": 45.37,
"learning_rate": 1e-06,
"loss": 0.0177,
"step": 18600
},
{
"epoch": 45.43,
"learning_rate": 1e-06,
"loss": 0.0185,
"step": 18625
},
{
"epoch": 45.49,
"learning_rate": 1e-06,
"loss": 0.0211,
"step": 18650
},
{
"epoch": 45.55,
"learning_rate": 1e-06,
"loss": 0.0188,
"step": 18675
},
{
"epoch": 45.62,
"learning_rate": 1e-06,
"loss": 0.0219,
"step": 18700
},
{
"epoch": 45.68,
"learning_rate": 1e-06,
"loss": 0.0207,
"step": 18725
},
{
"epoch": 45.74,
"learning_rate": 1e-06,
"loss": 0.0167,
"step": 18750
},
{
"epoch": 45.8,
"learning_rate": 1e-06,
"loss": 0.019,
"step": 18775
},
{
"epoch": 45.87,
"learning_rate": 1e-06,
"loss": 0.021,
"step": 18800
},
{
"epoch": 45.93,
"learning_rate": 1e-06,
"loss": 0.0203,
"step": 18825
},
{
"epoch": 45.99,
"learning_rate": 1e-06,
"loss": 0.0195,
"step": 18850
},
{
"epoch": 46.05,
"learning_rate": 1e-06,
"loss": 0.0203,
"step": 18875
},
{
"epoch": 46.11,
"learning_rate": 1e-06,
"loss": 0.0189,
"step": 18900
},
{
"epoch": 46.18,
"learning_rate": 1e-06,
"loss": 0.0182,
"step": 18925
},
{
"epoch": 46.24,
"learning_rate": 1e-06,
"loss": 0.0194,
"step": 18950
},
{
"epoch": 46.3,
"learning_rate": 1e-06,
"loss": 0.0176,
"step": 18975
},
{
"epoch": 46.36,
"learning_rate": 1e-06,
"loss": 0.0193,
"step": 19000
},
{
"epoch": 46.36,
"eval_loss": 0.499267578125,
"eval_runtime": 167.0506,
"eval_samples_per_second": 10.153,
"eval_steps_per_second": 0.635,
"eval_wer": 17.72845468053492,
"step": 19000
},
{
"epoch": 46.43,
"learning_rate": 1e-06,
"loss": 0.0184,
"step": 19025
},
{
"epoch": 46.49,
"learning_rate": 1e-06,
"loss": 0.0183,
"step": 19050
},
{
"epoch": 46.55,
"learning_rate": 1e-06,
"loss": 0.0178,
"step": 19075
},
{
"epoch": 46.61,
"learning_rate": 1e-06,
"loss": 0.0173,
"step": 19100
},
{
"epoch": 46.67,
"learning_rate": 1e-06,
"loss": 0.0175,
"step": 19125
},
{
"epoch": 46.74,
"learning_rate": 1e-06,
"loss": 0.019,
"step": 19150
},
{
"epoch": 46.8,
"learning_rate": 1e-06,
"loss": 0.0195,
"step": 19175
},
{
"epoch": 46.86,
"learning_rate": 1e-06,
"loss": 0.0163,
"step": 19200
},
{
"epoch": 46.92,
"learning_rate": 1e-06,
"loss": 0.0212,
"step": 19225
},
{
"epoch": 46.99,
"learning_rate": 1e-06,
"loss": 0.0185,
"step": 19250
},
{
"epoch": 47.05,
"learning_rate": 1e-06,
"loss": 0.0168,
"step": 19275
},
{
"epoch": 47.11,
"learning_rate": 1e-06,
"loss": 0.0168,
"step": 19300
},
{
"epoch": 47.17,
"learning_rate": 1e-06,
"loss": 0.0198,
"step": 19325
},
{
"epoch": 47.23,
"learning_rate": 1e-06,
"loss": 0.0156,
"step": 19350
},
{
"epoch": 47.3,
"learning_rate": 1e-06,
"loss": 0.0157,
"step": 19375
},
{
"epoch": 47.36,
"learning_rate": 1e-06,
"loss": 0.0176,
"step": 19400
},
{
"epoch": 47.42,
"learning_rate": 1e-06,
"loss": 0.019,
"step": 19425
},
{
"epoch": 47.48,
"learning_rate": 1e-06,
"loss": 0.0198,
"step": 19450
},
{
"epoch": 47.54,
"learning_rate": 1e-06,
"loss": 0.0177,
"step": 19475
},
{
"epoch": 47.61,
"learning_rate": 1e-06,
"loss": 0.0178,
"step": 19500
},
{
"epoch": 47.67,
"learning_rate": 1e-06,
"loss": 0.0171,
"step": 19525
},
{
"epoch": 47.73,
"learning_rate": 1e-06,
"loss": 0.0196,
"step": 19550
},
{
"epoch": 47.79,
"learning_rate": 1e-06,
"loss": 0.0191,
"step": 19575
},
{
"epoch": 47.86,
"learning_rate": 1e-06,
"loss": 0.0206,
"step": 19600
},
{
"epoch": 47.92,
"learning_rate": 1e-06,
"loss": 0.0199,
"step": 19625
},
{
"epoch": 47.98,
"learning_rate": 1e-06,
"loss": 0.0205,
"step": 19650
},
{
"epoch": 48.04,
"learning_rate": 1e-06,
"loss": 0.0196,
"step": 19675
},
{
"epoch": 48.1,
"learning_rate": 1e-06,
"loss": 0.0197,
"step": 19700
},
{
"epoch": 48.17,
"learning_rate": 1e-06,
"loss": 0.0136,
"step": 19725
},
{
"epoch": 48.23,
"learning_rate": 1e-06,
"loss": 0.0151,
"step": 19750
},
{
"epoch": 48.29,
"learning_rate": 1e-06,
"loss": 0.0163,
"step": 19775
},
{
"epoch": 48.35,
"learning_rate": 1e-06,
"loss": 0.0188,
"step": 19800
},
{
"epoch": 48.42,
"learning_rate": 1e-06,
"loss": 0.0172,
"step": 19825
},
{
"epoch": 48.48,
"learning_rate": 1e-06,
"loss": 0.0188,
"step": 19850
},
{
"epoch": 48.54,
"learning_rate": 1e-06,
"loss": 0.0178,
"step": 19875
},
{
"epoch": 48.6,
"learning_rate": 1e-06,
"loss": 0.0204,
"step": 19900
},
{
"epoch": 48.66,
"learning_rate": 1e-06,
"loss": 0.0203,
"step": 19925
},
{
"epoch": 48.73,
"learning_rate": 1e-06,
"loss": 0.017,
"step": 19950
},
{
"epoch": 48.79,
"learning_rate": 1e-06,
"loss": 0.0173,
"step": 19975
},
{
"epoch": 48.85,
"learning_rate": 1e-06,
"loss": 0.0161,
"step": 20000
},
{
"epoch": 48.85,
"eval_loss": 0.496826171875,
"eval_runtime": 166.936,
"eval_samples_per_second": 10.16,
"eval_steps_per_second": 0.635,
"eval_wer": 17.830609212481427,
"step": 20000
},
{
"epoch": 48.85,
"step": 20000,
"total_flos": 1.84611545204933e+20,
"train_loss": 0.01866207084655762,
"train_runtime": 23950.5227,
"train_samples_per_second": 26.722,
"train_steps_per_second": 0.835
}
],
"max_steps": 20000,
"num_train_epochs": 50,
"total_flos": 1.84611545204933e+20,
"trial_name": null,
"trial_params": null
}