makhataei's picture
Training in progress, step 1200
5aea8a5
raw
history blame
45.4 kB
{
"best_metric": 45.965834862992175,
"best_model_checkpoint": "/media/makhataei/Backups/Whisper-Small-Common-Voice/checkpoint-6200",
"epoch": 23.93822393822394,
"eval_steps": 100,
"global_step": 6200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.1,
"learning_rate": 4.600000000000001e-06,
"loss": 3.1025,
"step": 25
},
{
"epoch": 0.19,
"learning_rate": 9.600000000000001e-06,
"loss": 1.3525,
"step": 50
},
{
"epoch": 0.29,
"learning_rate": 9.976884422110554e-06,
"loss": 0.9564,
"step": 75
},
{
"epoch": 0.39,
"learning_rate": 9.95175879396985e-06,
"loss": 0.7495,
"step": 100
},
{
"epoch": 0.39,
"eval_loss": 0.860159158706665,
"eval_runtime": 1426.2695,
"eval_samples_per_second": 6.062,
"eval_steps_per_second": 0.606,
"eval_wer": 58.07742554282218,
"step": 100
},
{
"epoch": 0.48,
"learning_rate": 9.926633165829147e-06,
"loss": 0.5073,
"step": 125
},
{
"epoch": 0.58,
"learning_rate": 9.901507537688444e-06,
"loss": 0.3749,
"step": 150
},
{
"epoch": 0.68,
"learning_rate": 9.87638190954774e-06,
"loss": 0.3247,
"step": 175
},
{
"epoch": 0.77,
"learning_rate": 9.851256281407035e-06,
"loss": 0.3059,
"step": 200
},
{
"epoch": 0.77,
"eval_loss": 0.5538379549980164,
"eval_runtime": 1589.2333,
"eval_samples_per_second": 5.44,
"eval_steps_per_second": 0.544,
"eval_wer": 55.22481850804518,
"step": 200
},
{
"epoch": 0.87,
"learning_rate": 9.826130653266333e-06,
"loss": 0.2963,
"step": 225
},
{
"epoch": 0.97,
"learning_rate": 9.80100502512563e-06,
"loss": 0.2882,
"step": 250
},
{
"epoch": 1.06,
"learning_rate": 9.775879396984925e-06,
"loss": 0.2327,
"step": 275
},
{
"epoch": 1.16,
"learning_rate": 9.750753768844222e-06,
"loss": 0.2002,
"step": 300
},
{
"epoch": 1.16,
"eval_loss": 0.5136818885803223,
"eval_runtime": 1646.1357,
"eval_samples_per_second": 5.252,
"eval_steps_per_second": 0.525,
"eval_wer": 52.46151047609599,
"step": 300
},
{
"epoch": 1.25,
"learning_rate": 9.725628140703518e-06,
"loss": 0.1893,
"step": 325
},
{
"epoch": 1.35,
"learning_rate": 9.700502512562815e-06,
"loss": 0.185,
"step": 350
},
{
"epoch": 1.45,
"learning_rate": 9.675376884422111e-06,
"loss": 0.1847,
"step": 375
},
{
"epoch": 1.54,
"learning_rate": 9.650251256281408e-06,
"loss": 0.1798,
"step": 400
},
{
"epoch": 1.54,
"eval_loss": 0.5102902054786682,
"eval_runtime": 1650.3734,
"eval_samples_per_second": 5.239,
"eval_steps_per_second": 0.524,
"eval_wer": 51.65781945064576,
"step": 400
},
{
"epoch": 1.64,
"learning_rate": 9.625125628140705e-06,
"loss": 0.1804,
"step": 425
},
{
"epoch": 1.74,
"learning_rate": 9.600000000000001e-06,
"loss": 0.1807,
"step": 450
},
{
"epoch": 1.83,
"learning_rate": 9.574874371859298e-06,
"loss": 0.1714,
"step": 475
},
{
"epoch": 1.93,
"learning_rate": 9.549748743718595e-06,
"loss": 0.1678,
"step": 500
},
{
"epoch": 1.93,
"eval_loss": 0.48063144087791443,
"eval_runtime": 1482.5018,
"eval_samples_per_second": 5.832,
"eval_steps_per_second": 0.583,
"eval_wer": 46.55123943708554,
"step": 500
},
{
"epoch": 2.03,
"learning_rate": 9.52462311557789e-06,
"loss": 0.1542,
"step": 525
},
{
"epoch": 2.12,
"learning_rate": 9.499497487437188e-06,
"loss": 0.0929,
"step": 550
},
{
"epoch": 2.22,
"learning_rate": 9.474371859296483e-06,
"loss": 0.0936,
"step": 575
},
{
"epoch": 2.32,
"learning_rate": 9.44924623115578e-06,
"loss": 0.0932,
"step": 600
},
{
"epoch": 2.32,
"eval_loss": 0.49363330006599426,
"eval_runtime": 1993.4903,
"eval_samples_per_second": 4.337,
"eval_steps_per_second": 0.434,
"eval_wer": 47.49549370772767,
"step": 600
},
{
"epoch": 2.41,
"learning_rate": 9.424120603015076e-06,
"loss": 0.0936,
"step": 625
},
{
"epoch": 2.51,
"learning_rate": 9.398994974874373e-06,
"loss": 0.0955,
"step": 650
},
{
"epoch": 2.61,
"learning_rate": 9.373869346733669e-06,
"loss": 0.0871,
"step": 675
},
{
"epoch": 2.7,
"learning_rate": 9.348743718592966e-06,
"loss": 0.0944,
"step": 700
},
{
"epoch": 2.7,
"eval_loss": 0.5119881629943848,
"eval_runtime": 1791.3192,
"eval_samples_per_second": 4.827,
"eval_steps_per_second": 0.483,
"eval_wer": 48.91600932678474,
"step": 700
},
{
"epoch": 2.8,
"learning_rate": 9.323618090452262e-06,
"loss": 0.0927,
"step": 725
},
{
"epoch": 2.9,
"learning_rate": 9.298492462311557e-06,
"loss": 0.0964,
"step": 750
},
{
"epoch": 2.99,
"learning_rate": 9.273366834170856e-06,
"loss": 0.1038,
"step": 775
},
{
"epoch": 3.09,
"learning_rate": 9.248241206030152e-06,
"loss": 0.0493,
"step": 800
},
{
"epoch": 3.09,
"eval_loss": 0.5198288559913635,
"eval_runtime": 1878.4077,
"eval_samples_per_second": 4.603,
"eval_steps_per_second": 0.46,
"eval_wer": 48.26114997271419,
"step": 800
},
{
"epoch": 3.19,
"learning_rate": 9.223115577889447e-06,
"loss": 0.0501,
"step": 825
},
{
"epoch": 3.28,
"learning_rate": 9.197989949748744e-06,
"loss": 0.0458,
"step": 850
},
{
"epoch": 3.38,
"learning_rate": 9.17286432160804e-06,
"loss": 0.0492,
"step": 875
},
{
"epoch": 3.47,
"learning_rate": 9.147738693467337e-06,
"loss": 0.0506,
"step": 900
},
{
"epoch": 3.47,
"eval_loss": 0.5227376222610474,
"eval_runtime": 1759.968,
"eval_samples_per_second": 4.913,
"eval_steps_per_second": 0.491,
"eval_wer": 47.41280944585008,
"step": 900
},
{
"epoch": 3.57,
"learning_rate": 9.122613065326634e-06,
"loss": 0.0494,
"step": 925
},
{
"epoch": 3.67,
"learning_rate": 9.09748743718593e-06,
"loss": 0.051,
"step": 950
},
{
"epoch": 3.76,
"learning_rate": 9.072361809045227e-06,
"loss": 0.0466,
"step": 975
},
{
"epoch": 3.86,
"learning_rate": 9.047236180904524e-06,
"loss": 0.0482,
"step": 1000
},
{
"epoch": 3.86,
"eval_loss": 0.5255833268165588,
"eval_runtime": 1701.3645,
"eval_samples_per_second": 5.082,
"eval_steps_per_second": 0.508,
"eval_wer": 46.94977757933555,
"step": 1000
},
{
"epoch": 3.96,
"learning_rate": 9.02211055276382e-06,
"loss": 0.046,
"step": 1025
},
{
"epoch": 4.05,
"learning_rate": 8.996984924623117e-06,
"loss": 0.0368,
"step": 1050
},
{
"epoch": 4.15,
"learning_rate": 8.971859296482412e-06,
"loss": 0.0233,
"step": 1075
},
{
"epoch": 4.25,
"learning_rate": 8.94673366834171e-06,
"loss": 0.0244,
"step": 1100
},
{
"epoch": 4.25,
"eval_loss": 0.5491495132446289,
"eval_runtime": 1619.0836,
"eval_samples_per_second": 5.34,
"eval_steps_per_second": 0.534,
"eval_wer": 46.084900200095916,
"step": 1100
},
{
"epoch": 4.34,
"learning_rate": 8.921608040201005e-06,
"loss": 0.0258,
"step": 1125
},
{
"epoch": 4.44,
"learning_rate": 8.896482412060302e-06,
"loss": 0.0248,
"step": 1150
},
{
"epoch": 4.54,
"learning_rate": 8.8713567839196e-06,
"loss": 0.0241,
"step": 1175
},
{
"epoch": 4.63,
"learning_rate": 8.846231155778895e-06,
"loss": 0.0272,
"step": 1200
},
{
"epoch": 4.63,
"eval_loss": 0.5620735883712769,
"eval_runtime": 1837.7585,
"eval_samples_per_second": 4.705,
"eval_steps_per_second": 0.471,
"eval_wer": 46.916703874584506,
"step": 1200
},
{
"epoch": 4.73,
"learning_rate": 8.821105527638191e-06,
"loss": 0.0258,
"step": 1225
},
{
"epoch": 4.83,
"learning_rate": 8.795979899497488e-06,
"loss": 0.0237,
"step": 1250
},
{
"epoch": 4.92,
"learning_rate": 8.770854271356785e-06,
"loss": 0.0266,
"step": 1275
},
{
"epoch": 5.02,
"learning_rate": 8.745728643216081e-06,
"loss": 0.0246,
"step": 1300
},
{
"epoch": 5.02,
"eval_loss": 0.5855526328086853,
"eval_runtime": 1908.9589,
"eval_samples_per_second": 4.529,
"eval_steps_per_second": 0.453,
"eval_wer": 48.80355873063122,
"step": 1300
},
{
"epoch": 5.12,
"learning_rate": 8.720603015075378e-06,
"loss": 0.0142,
"step": 1325
},
{
"epoch": 5.21,
"learning_rate": 8.695477386934675e-06,
"loss": 0.014,
"step": 1350
},
{
"epoch": 5.31,
"learning_rate": 8.67035175879397e-06,
"loss": 0.0126,
"step": 1375
},
{
"epoch": 5.41,
"learning_rate": 8.645226130653268e-06,
"loss": 0.0159,
"step": 1400
},
{
"epoch": 5.41,
"eval_loss": 0.6145116090774536,
"eval_runtime": 1843.4778,
"eval_samples_per_second": 4.69,
"eval_steps_per_second": 0.469,
"eval_wer": 48.30414578889054,
"step": 1400
},
{
"epoch": 5.5,
"learning_rate": 8.620100502512564e-06,
"loss": 0.0144,
"step": 1425
},
{
"epoch": 5.6,
"learning_rate": 8.59497487437186e-06,
"loss": 0.0133,
"step": 1450
},
{
"epoch": 5.69,
"learning_rate": 8.569849246231156e-06,
"loss": 0.0151,
"step": 1475
},
{
"epoch": 5.79,
"learning_rate": 8.544723618090453e-06,
"loss": 0.0152,
"step": 1500
},
{
"epoch": 5.79,
"eval_loss": 0.6177700757980347,
"eval_runtime": 1839.895,
"eval_samples_per_second": 4.699,
"eval_steps_per_second": 0.47,
"eval_wer": 47.756775975260865,
"step": 1500
},
{
"epoch": 5.89,
"learning_rate": 8.51959798994975e-06,
"loss": 0.014,
"step": 1525
},
{
"epoch": 5.98,
"learning_rate": 8.494472361809046e-06,
"loss": 0.016,
"step": 1550
},
{
"epoch": 6.08,
"learning_rate": 8.469346733668342e-06,
"loss": 0.0092,
"step": 1575
},
{
"epoch": 6.18,
"learning_rate": 8.444221105527639e-06,
"loss": 0.008,
"step": 1600
},
{
"epoch": 6.18,
"eval_loss": 0.6190542578697205,
"eval_runtime": 1730.2546,
"eval_samples_per_second": 4.997,
"eval_steps_per_second": 0.5,
"eval_wer": 48.06105405897041,
"step": 1600
},
{
"epoch": 6.27,
"learning_rate": 8.419095477386936e-06,
"loss": 0.0079,
"step": 1625
},
{
"epoch": 6.37,
"learning_rate": 8.393969849246232e-06,
"loss": 0.0087,
"step": 1650
},
{
"epoch": 6.47,
"learning_rate": 8.368844221105529e-06,
"loss": 0.0082,
"step": 1675
},
{
"epoch": 6.56,
"learning_rate": 8.343718592964824e-06,
"loss": 0.0077,
"step": 1700
},
{
"epoch": 6.56,
"eval_loss": 0.6308984160423279,
"eval_runtime": 1686.6544,
"eval_samples_per_second": 5.126,
"eval_steps_per_second": 0.513,
"eval_wer": 46.35941194952953,
"step": 1700
},
{
"epoch": 6.66,
"learning_rate": 8.318592964824122e-06,
"loss": 0.0076,
"step": 1725
},
{
"epoch": 6.76,
"learning_rate": 8.293467336683417e-06,
"loss": 0.0095,
"step": 1750
},
{
"epoch": 6.85,
"learning_rate": 8.268341708542714e-06,
"loss": 0.0087,
"step": 1775
},
{
"epoch": 6.95,
"learning_rate": 8.24321608040201e-06,
"loss": 0.0097,
"step": 1800
},
{
"epoch": 6.95,
"eval_loss": 0.6272400617599487,
"eval_runtime": 1856.6423,
"eval_samples_per_second": 4.657,
"eval_steps_per_second": 0.466,
"eval_wer": 47.9138760728283,
"step": 1800
},
{
"epoch": 7.05,
"learning_rate": 8.218090452261307e-06,
"loss": 0.009,
"step": 1825
},
{
"epoch": 7.14,
"learning_rate": 8.192964824120604e-06,
"loss": 0.0053,
"step": 1850
},
{
"epoch": 7.24,
"learning_rate": 8.1678391959799e-06,
"loss": 0.0051,
"step": 1875
},
{
"epoch": 7.34,
"learning_rate": 8.142713567839197e-06,
"loss": 0.0056,
"step": 1900
},
{
"epoch": 7.34,
"eval_loss": 0.6594315767288208,
"eval_runtime": 1711.6251,
"eval_samples_per_second": 5.051,
"eval_steps_per_second": 0.505,
"eval_wer": 46.5562004927982,
"step": 1900
},
{
"epoch": 7.43,
"learning_rate": 8.117587939698493e-06,
"loss": 0.0056,
"step": 1925
},
{
"epoch": 7.53,
"learning_rate": 8.09246231155779e-06,
"loss": 0.0058,
"step": 1950
},
{
"epoch": 7.63,
"learning_rate": 8.067336683417087e-06,
"loss": 0.007,
"step": 1975
},
{
"epoch": 7.72,
"learning_rate": 8.042211055276382e-06,
"loss": 0.0049,
"step": 2000
},
{
"epoch": 7.72,
"eval_loss": 0.6581406593322754,
"eval_runtime": 1917.4042,
"eval_samples_per_second": 4.509,
"eval_steps_per_second": 0.451,
"eval_wer": 47.58313902531792,
"step": 2000
},
{
"epoch": 7.82,
"learning_rate": 8.01708542713568e-06,
"loss": 0.0064,
"step": 2025
},
{
"epoch": 7.92,
"learning_rate": 7.991959798994977e-06,
"loss": 0.0055,
"step": 2050
},
{
"epoch": 8.01,
"learning_rate": 7.966834170854271e-06,
"loss": 0.0055,
"step": 2075
},
{
"epoch": 8.11,
"learning_rate": 7.941708542713568e-06,
"loss": 0.0042,
"step": 2100
},
{
"epoch": 8.11,
"eval_loss": 0.695335865020752,
"eval_runtime": 1836.7482,
"eval_samples_per_second": 4.707,
"eval_steps_per_second": 0.471,
"eval_wer": 48.092474078483896,
"step": 2100
},
{
"epoch": 8.2,
"learning_rate": 7.916582914572865e-06,
"loss": 0.004,
"step": 2125
},
{
"epoch": 8.3,
"learning_rate": 7.891457286432161e-06,
"loss": 0.0046,
"step": 2150
},
{
"epoch": 8.4,
"learning_rate": 7.866331658291458e-06,
"loss": 0.004,
"step": 2175
},
{
"epoch": 8.49,
"learning_rate": 7.841206030150755e-06,
"loss": 0.0038,
"step": 2200
},
{
"epoch": 8.49,
"eval_loss": 0.6856936812400818,
"eval_runtime": 1910.3433,
"eval_samples_per_second": 4.526,
"eval_steps_per_second": 0.453,
"eval_wer": 48.371946883630166,
"step": 2200
},
{
"epoch": 8.59,
"learning_rate": 7.816080402010051e-06,
"loss": 0.0036,
"step": 2225
},
{
"epoch": 8.69,
"learning_rate": 7.790954773869348e-06,
"loss": 0.0038,
"step": 2250
},
{
"epoch": 8.78,
"learning_rate": 7.765829145728644e-06,
"loss": 0.0033,
"step": 2275
},
{
"epoch": 8.88,
"learning_rate": 7.740703517587941e-06,
"loss": 0.0033,
"step": 2300
},
{
"epoch": 8.88,
"eval_loss": 0.6982905268669128,
"eval_runtime": 1950.8321,
"eval_samples_per_second": 4.432,
"eval_steps_per_second": 0.443,
"eval_wer": 49.69654875890923,
"step": 2300
},
{
"epoch": 8.98,
"learning_rate": 7.715577889447236e-06,
"loss": 0.0036,
"step": 2325
},
{
"epoch": 9.07,
"learning_rate": 7.690452261306534e-06,
"loss": 0.0029,
"step": 2350
},
{
"epoch": 9.17,
"learning_rate": 7.66532663316583e-06,
"loss": 0.0027,
"step": 2375
},
{
"epoch": 9.27,
"learning_rate": 7.640201005025126e-06,
"loss": 0.003,
"step": 2400
},
{
"epoch": 9.27,
"eval_loss": 0.7109295129776001,
"eval_runtime": 1863.5651,
"eval_samples_per_second": 4.639,
"eval_steps_per_second": 0.464,
"eval_wer": 48.11727935704718,
"step": 2400
},
{
"epoch": 9.36,
"learning_rate": 7.615075376884423e-06,
"loss": 0.003,
"step": 2425
},
{
"epoch": 9.46,
"learning_rate": 7.589949748743719e-06,
"loss": 0.0025,
"step": 2450
},
{
"epoch": 9.56,
"learning_rate": 7.564824120603016e-06,
"loss": 0.0027,
"step": 2475
},
{
"epoch": 9.65,
"learning_rate": 7.5396984924623115e-06,
"loss": 0.0033,
"step": 2500
},
{
"epoch": 9.65,
"eval_loss": 0.6899213194847107,
"eval_runtime": 1934.8822,
"eval_samples_per_second": 4.468,
"eval_steps_per_second": 0.447,
"eval_wer": 48.236344694150915,
"step": 2500
},
{
"epoch": 9.75,
"learning_rate": 7.514572864321609e-06,
"loss": 0.0025,
"step": 2525
},
{
"epoch": 9.85,
"learning_rate": 7.4894472361809056e-06,
"loss": 0.002,
"step": 2550
},
{
"epoch": 9.94,
"learning_rate": 7.464321608040201e-06,
"loss": 0.0031,
"step": 2575
},
{
"epoch": 10.04,
"learning_rate": 7.439195979899499e-06,
"loss": 0.0027,
"step": 2600
},
{
"epoch": 10.04,
"eval_loss": 0.7074257135391235,
"eval_runtime": 1945.5025,
"eval_samples_per_second": 4.444,
"eval_steps_per_second": 0.445,
"eval_wer": 48.34052686411668,
"step": 2600
},
{
"epoch": 10.14,
"learning_rate": 7.4140703517587946e-06,
"loss": 0.0027,
"step": 2625
},
{
"epoch": 10.23,
"learning_rate": 7.388944723618091e-06,
"loss": 0.0029,
"step": 2650
},
{
"epoch": 10.33,
"learning_rate": 7.363819095477388e-06,
"loss": 0.0032,
"step": 2675
},
{
"epoch": 10.42,
"learning_rate": 7.338693467336684e-06,
"loss": 0.0035,
"step": 2700
},
{
"epoch": 10.42,
"eval_loss": 0.7017741799354553,
"eval_runtime": 1835.0488,
"eval_samples_per_second": 4.712,
"eval_steps_per_second": 0.471,
"eval_wer": 47.10026293595277,
"step": 2700
},
{
"epoch": 10.52,
"learning_rate": 7.313567839195981e-06,
"loss": 0.0024,
"step": 2725
},
{
"epoch": 10.62,
"learning_rate": 7.288442211055277e-06,
"loss": 0.0024,
"step": 2750
},
{
"epoch": 10.71,
"learning_rate": 7.2633165829145734e-06,
"loss": 0.0031,
"step": 2775
},
{
"epoch": 10.81,
"learning_rate": 7.23819095477387e-06,
"loss": 0.0026,
"step": 2800
},
{
"epoch": 10.81,
"eval_loss": 0.7197884917259216,
"eval_runtime": 1907.1753,
"eval_samples_per_second": 4.533,
"eval_steps_per_second": 0.454,
"eval_wer": 47.12506821451605,
"step": 2800
},
{
"epoch": 10.91,
"learning_rate": 7.213065326633167e-06,
"loss": 0.0022,
"step": 2825
},
{
"epoch": 11.0,
"learning_rate": 7.187939698492463e-06,
"loss": 0.0027,
"step": 2850
},
{
"epoch": 11.1,
"learning_rate": 7.162814070351759e-06,
"loss": 0.0022,
"step": 2875
},
{
"epoch": 11.2,
"learning_rate": 7.137688442211056e-06,
"loss": 0.0027,
"step": 2900
},
{
"epoch": 11.2,
"eval_loss": 0.7459501624107361,
"eval_runtime": 2001.7998,
"eval_samples_per_second": 4.319,
"eval_steps_per_second": 0.432,
"eval_wer": 48.27603313985216,
"step": 2900
},
{
"epoch": 11.29,
"learning_rate": 7.112562814070353e-06,
"loss": 0.0024,
"step": 2925
},
{
"epoch": 11.39,
"learning_rate": 7.087437185929649e-06,
"loss": 0.0028,
"step": 2950
},
{
"epoch": 11.49,
"learning_rate": 7.0623115577889456e-06,
"loss": 0.0022,
"step": 2975
},
{
"epoch": 11.58,
"learning_rate": 7.037185929648241e-06,
"loss": 0.0023,
"step": 3000
},
{
"epoch": 11.58,
"eval_loss": 0.7347891926765442,
"eval_runtime": 1876.4701,
"eval_samples_per_second": 4.608,
"eval_steps_per_second": 0.461,
"eval_wer": 47.71543384432207,
"step": 3000
},
{
"epoch": 11.68,
"learning_rate": 7.012060301507538e-06,
"loss": 0.0022,
"step": 3025
},
{
"epoch": 11.78,
"learning_rate": 6.9869346733668354e-06,
"loss": 0.0028,
"step": 3050
},
{
"epoch": 11.87,
"learning_rate": 6.961809045226131e-06,
"loss": 0.0027,
"step": 3075
},
{
"epoch": 11.97,
"learning_rate": 6.936683417085428e-06,
"loss": 0.0023,
"step": 3100
},
{
"epoch": 11.97,
"eval_loss": 0.7387125492095947,
"eval_runtime": 2084.696,
"eval_samples_per_second": 4.147,
"eval_steps_per_second": 0.415,
"eval_wer": 48.00813613136876,
"step": 3100
},
{
"epoch": 12.07,
"learning_rate": 6.911557788944724e-06,
"loss": 0.002,
"step": 3125
},
{
"epoch": 12.16,
"learning_rate": 6.886432160804021e-06,
"loss": 0.0019,
"step": 3150
},
{
"epoch": 12.26,
"learning_rate": 6.861306532663318e-06,
"loss": 0.0028,
"step": 3175
},
{
"epoch": 12.36,
"learning_rate": 6.8361809045226135e-06,
"loss": 0.0024,
"step": 3200
},
{
"epoch": 12.36,
"eval_loss": 0.7199321985244751,
"eval_runtime": 1992.5899,
"eval_samples_per_second": 4.339,
"eval_steps_per_second": 0.434,
"eval_wer": 46.86874700269551,
"step": 3200
},
{
"epoch": 12.45,
"learning_rate": 6.81105527638191e-06,
"loss": 0.0024,
"step": 3225
},
{
"epoch": 12.55,
"learning_rate": 6.785929648241206e-06,
"loss": 0.0019,
"step": 3250
},
{
"epoch": 12.64,
"learning_rate": 6.760804020100503e-06,
"loss": 0.0024,
"step": 3275
},
{
"epoch": 12.74,
"learning_rate": 6.735678391959799e-06,
"loss": 0.0033,
"step": 3300
},
{
"epoch": 12.74,
"eval_loss": 0.7249542474746704,
"eval_runtime": 2175.7785,
"eval_samples_per_second": 3.974,
"eval_steps_per_second": 0.398,
"eval_wer": 48.68945444924013,
"step": 3300
},
{
"epoch": 12.84,
"learning_rate": 6.710552763819096e-06,
"loss": 0.0021,
"step": 3325
},
{
"epoch": 12.93,
"learning_rate": 6.685427135678393e-06,
"loss": 0.0014,
"step": 3350
},
{
"epoch": 13.03,
"learning_rate": 6.660301507537689e-06,
"loss": 0.0011,
"step": 3375
},
{
"epoch": 13.13,
"learning_rate": 6.6351758793969856e-06,
"loss": 0.0017,
"step": 3400
},
{
"epoch": 13.13,
"eval_loss": 0.7241615653038025,
"eval_runtime": 2420.7565,
"eval_samples_per_second": 3.572,
"eval_steps_per_second": 0.357,
"eval_wer": 49.044996775313784,
"step": 3400
},
{
"epoch": 13.22,
"learning_rate": 6.610050251256281e-06,
"loss": 0.0019,
"step": 3425
},
{
"epoch": 13.32,
"learning_rate": 6.584924623115578e-06,
"loss": 0.0014,
"step": 3450
},
{
"epoch": 13.42,
"learning_rate": 6.5597989949748754e-06,
"loss": 0.0011,
"step": 3475
},
{
"epoch": 13.51,
"learning_rate": 6.534673366834171e-06,
"loss": 0.0016,
"step": 3500
},
{
"epoch": 13.51,
"eval_loss": 0.7358579039573669,
"eval_runtime": 2057.6253,
"eval_samples_per_second": 4.202,
"eval_steps_per_second": 0.42,
"eval_wer": 48.70103024590299,
"step": 3500
},
{
"epoch": 13.61,
"learning_rate": 6.509547738693468e-06,
"loss": 0.002,
"step": 3525
},
{
"epoch": 13.71,
"learning_rate": 6.484422110552764e-06,
"loss": 0.0015,
"step": 3550
},
{
"epoch": 13.8,
"learning_rate": 6.459296482412061e-06,
"loss": 0.0024,
"step": 3575
},
{
"epoch": 13.9,
"learning_rate": 6.434170854271358e-06,
"loss": 0.0022,
"step": 3600
},
{
"epoch": 13.9,
"eval_loss": 0.7220202088356018,
"eval_runtime": 2243.6869,
"eval_samples_per_second": 3.853,
"eval_steps_per_second": 0.386,
"eval_wer": 48.1371235798978,
"step": 3600
},
{
"epoch": 14.0,
"learning_rate": 6.4090452261306535e-06,
"loss": 0.0021,
"step": 3625
},
{
"epoch": 14.09,
"learning_rate": 6.38391959798995e-06,
"loss": 0.0013,
"step": 3650
},
{
"epoch": 14.19,
"learning_rate": 6.358793969849246e-06,
"loss": 0.0014,
"step": 3675
},
{
"epoch": 14.29,
"learning_rate": 6.333668341708543e-06,
"loss": 0.0016,
"step": 3700
},
{
"epoch": 14.29,
"eval_loss": 0.7430799007415771,
"eval_runtime": 1822.7961,
"eval_samples_per_second": 4.743,
"eval_steps_per_second": 0.475,
"eval_wer": 46.75629640654198,
"step": 3700
},
{
"epoch": 14.38,
"learning_rate": 6.30854271356784e-06,
"loss": 0.0008,
"step": 3725
},
{
"epoch": 14.48,
"learning_rate": 6.283417085427136e-06,
"loss": 0.0014,
"step": 3750
},
{
"epoch": 14.58,
"learning_rate": 6.258291457286433e-06,
"loss": 0.001,
"step": 3775
},
{
"epoch": 14.67,
"learning_rate": 6.233165829145729e-06,
"loss": 0.0012,
"step": 3800
},
{
"epoch": 14.67,
"eval_loss": 0.7563586235046387,
"eval_runtime": 2100.7282,
"eval_samples_per_second": 4.116,
"eval_steps_per_second": 0.412,
"eval_wer": 47.014271303600076,
"step": 3800
},
{
"epoch": 14.77,
"learning_rate": 6.208040201005026e-06,
"loss": 0.0017,
"step": 3825
},
{
"epoch": 14.86,
"learning_rate": 6.182914572864322e-06,
"loss": 0.0015,
"step": 3850
},
{
"epoch": 14.96,
"learning_rate": 6.157788944723618e-06,
"loss": 0.0012,
"step": 3875
},
{
"epoch": 15.06,
"learning_rate": 6.1326633165829155e-06,
"loss": 0.0014,
"step": 3900
},
{
"epoch": 15.06,
"eval_loss": 0.7769792079925537,
"eval_runtime": 2049.0992,
"eval_samples_per_second": 4.219,
"eval_steps_per_second": 0.422,
"eval_wer": 47.02419341502539,
"step": 3900
},
{
"epoch": 15.15,
"learning_rate": 6.107537688442211e-06,
"loss": 0.0011,
"step": 3925
},
{
"epoch": 15.25,
"learning_rate": 6.082412060301508e-06,
"loss": 0.001,
"step": 3950
},
{
"epoch": 15.35,
"learning_rate": 6.057286432160805e-06,
"loss": 0.0015,
"step": 3975
},
{
"epoch": 15.44,
"learning_rate": 6.032160804020101e-06,
"loss": 0.0008,
"step": 4000
},
{
"epoch": 15.44,
"eval_loss": 0.8116338849067688,
"eval_runtime": 2088.1418,
"eval_samples_per_second": 4.141,
"eval_steps_per_second": 0.414,
"eval_wer": 48.46951431264573,
"step": 4000
},
{
"epoch": 15.54,
"learning_rate": 6.007035175879398e-06,
"loss": 0.0015,
"step": 4025
},
{
"epoch": 15.64,
"learning_rate": 5.9819095477386935e-06,
"loss": 0.0008,
"step": 4050
},
{
"epoch": 15.73,
"learning_rate": 5.95678391959799e-06,
"loss": 0.0016,
"step": 4075
},
{
"epoch": 15.83,
"learning_rate": 5.9316582914572876e-06,
"loss": 0.0015,
"step": 4100
},
{
"epoch": 15.83,
"eval_loss": 0.7622503638267517,
"eval_runtime": 2010.7151,
"eval_samples_per_second": 4.3,
"eval_steps_per_second": 0.43,
"eval_wer": 48.092474078483896,
"step": 4100
},
{
"epoch": 15.93,
"learning_rate": 5.906532663316583e-06,
"loss": 0.0016,
"step": 4125
},
{
"epoch": 16.02,
"learning_rate": 5.88140703517588e-06,
"loss": 0.002,
"step": 4150
},
{
"epoch": 16.12,
"learning_rate": 5.856281407035176e-06,
"loss": 0.0015,
"step": 4175
},
{
"epoch": 16.22,
"learning_rate": 5.831155778894473e-06,
"loss": 0.002,
"step": 4200
},
{
"epoch": 16.22,
"eval_loss": 0.7698885202407837,
"eval_runtime": 1914.0687,
"eval_samples_per_second": 4.517,
"eval_steps_per_second": 0.452,
"eval_wer": 47.72535595574738,
"step": 4200
},
{
"epoch": 16.31,
"learning_rate": 5.80603015075377e-06,
"loss": 0.0013,
"step": 4225
},
{
"epoch": 16.41,
"learning_rate": 5.780904522613066e-06,
"loss": 0.0017,
"step": 4250
},
{
"epoch": 16.51,
"learning_rate": 5.755778894472362e-06,
"loss": 0.0023,
"step": 4275
},
{
"epoch": 16.6,
"learning_rate": 5.730653266331658e-06,
"loss": 0.001,
"step": 4300
},
{
"epoch": 16.6,
"eval_loss": 0.7630722522735596,
"eval_runtime": 1923.0045,
"eval_samples_per_second": 4.496,
"eval_steps_per_second": 0.45,
"eval_wer": 46.87536174364572,
"step": 4300
},
{
"epoch": 16.7,
"learning_rate": 5.7055276381909555e-06,
"loss": 0.0022,
"step": 4325
},
{
"epoch": 16.8,
"learning_rate": 5.680402010050252e-06,
"loss": 0.0012,
"step": 4350
},
{
"epoch": 16.89,
"learning_rate": 5.655276381909548e-06,
"loss": 0.0018,
"step": 4375
},
{
"epoch": 16.99,
"learning_rate": 5.6301507537688445e-06,
"loss": 0.0009,
"step": 4400
},
{
"epoch": 16.99,
"eval_loss": 0.7591461539268494,
"eval_runtime": 1922.4876,
"eval_samples_per_second": 4.497,
"eval_steps_per_second": 0.45,
"eval_wer": 48.416596385044066,
"step": 4400
},
{
"epoch": 17.08,
"learning_rate": 5.605025125628141e-06,
"loss": 0.0012,
"step": 4425
},
{
"epoch": 17.18,
"learning_rate": 5.579899497487438e-06,
"loss": 0.0009,
"step": 4450
},
{
"epoch": 17.28,
"learning_rate": 5.554773869346734e-06,
"loss": 0.0006,
"step": 4475
},
{
"epoch": 17.37,
"learning_rate": 5.52964824120603e-06,
"loss": 0.0008,
"step": 4500
},
{
"epoch": 17.37,
"eval_loss": 0.779658317565918,
"eval_runtime": 1935.42,
"eval_samples_per_second": 4.467,
"eval_steps_per_second": 0.447,
"eval_wer": 47.17798614211771,
"step": 4500
},
{
"epoch": 17.47,
"learning_rate": 5.504522613065328e-06,
"loss": 0.0022,
"step": 4525
},
{
"epoch": 17.57,
"learning_rate": 5.479396984924623e-06,
"loss": 0.0008,
"step": 4550
},
{
"epoch": 17.66,
"learning_rate": 5.45427135678392e-06,
"loss": 0.0016,
"step": 4575
},
{
"epoch": 17.76,
"learning_rate": 5.429145728643217e-06,
"loss": 0.0008,
"step": 4600
},
{
"epoch": 17.76,
"eval_loss": 0.7850877046585083,
"eval_runtime": 1838.7142,
"eval_samples_per_second": 4.702,
"eval_steps_per_second": 0.47,
"eval_wer": 46.76621851796729,
"step": 4600
},
{
"epoch": 17.86,
"learning_rate": 5.404020100502513e-06,
"loss": 0.0011,
"step": 4625
},
{
"epoch": 17.95,
"learning_rate": 5.37889447236181e-06,
"loss": 0.0007,
"step": 4650
},
{
"epoch": 18.05,
"learning_rate": 5.353768844221106e-06,
"loss": 0.0015,
"step": 4675
},
{
"epoch": 18.15,
"learning_rate": 5.328643216080402e-06,
"loss": 0.0007,
"step": 4700
},
{
"epoch": 18.15,
"eval_loss": 0.7897337079048157,
"eval_runtime": 1943.7204,
"eval_samples_per_second": 4.448,
"eval_steps_per_second": 0.445,
"eval_wer": 48.315721585553405,
"step": 4700
},
{
"epoch": 18.24,
"learning_rate": 5.3035175879397e-06,
"loss": 0.0007,
"step": 4725
},
{
"epoch": 18.34,
"learning_rate": 5.2783919597989955e-06,
"loss": 0.0006,
"step": 4750
},
{
"epoch": 18.44,
"learning_rate": 5.253266331658292e-06,
"loss": 0.0007,
"step": 4775
},
{
"epoch": 18.53,
"learning_rate": 5.228140703517588e-06,
"loss": 0.0006,
"step": 4800
},
{
"epoch": 18.53,
"eval_loss": 0.7760252952575684,
"eval_runtime": 1927.3698,
"eval_samples_per_second": 4.486,
"eval_steps_per_second": 0.449,
"eval_wer": 48.806866101106316,
"step": 4800
},
{
"epoch": 18.63,
"learning_rate": 5.2030150753768845e-06,
"loss": 0.001,
"step": 4825
},
{
"epoch": 18.73,
"learning_rate": 5.177889447236182e-06,
"loss": 0.0008,
"step": 4850
},
{
"epoch": 18.82,
"learning_rate": 5.152763819095478e-06,
"loss": 0.0017,
"step": 4875
},
{
"epoch": 18.92,
"learning_rate": 5.127638190954774e-06,
"loss": 0.0016,
"step": 4900
},
{
"epoch": 18.92,
"eval_loss": 0.7762993574142456,
"eval_runtime": 2000.3946,
"eval_samples_per_second": 4.322,
"eval_steps_per_second": 0.432,
"eval_wer": 47.97175505614261,
"step": 4900
},
{
"epoch": 19.02,
"learning_rate": 5.10251256281407e-06,
"loss": 0.0011,
"step": 4925
},
{
"epoch": 19.11,
"learning_rate": 5.077386934673368e-06,
"loss": 0.0006,
"step": 4950
},
{
"epoch": 19.21,
"learning_rate": 5.052261306532664e-06,
"loss": 0.0006,
"step": 4975
},
{
"epoch": 19.31,
"learning_rate": 5.02713567839196e-06,
"loss": 0.0009,
"step": 5000
},
{
"epoch": 19.31,
"eval_loss": 0.8151038289070129,
"eval_runtime": 1941.3748,
"eval_samples_per_second": 4.454,
"eval_steps_per_second": 0.446,
"eval_wer": 48.158621487985975,
"step": 5000
},
{
"epoch": 19.4,
"learning_rate": 5.002010050251257e-06,
"loss": 0.0008,
"step": 5025
},
{
"epoch": 19.5,
"learning_rate": 4.976884422110553e-06,
"loss": 0.0012,
"step": 5050
},
{
"epoch": 19.59,
"learning_rate": 4.95175879396985e-06,
"loss": 0.001,
"step": 5075
},
{
"epoch": 19.69,
"learning_rate": 4.9266331658291465e-06,
"loss": 0.0004,
"step": 5100
},
{
"epoch": 19.69,
"eval_loss": 0.7967365384101868,
"eval_runtime": 1771.7869,
"eval_samples_per_second": 4.88,
"eval_steps_per_second": 0.488,
"eval_wer": 47.55668006151709,
"step": 5100
},
{
"epoch": 19.79,
"learning_rate": 4.901507537688442e-06,
"loss": 0.0007,
"step": 5125
},
{
"epoch": 19.88,
"learning_rate": 4.876381909547739e-06,
"loss": 0.0007,
"step": 5150
},
{
"epoch": 19.98,
"learning_rate": 4.8512562814070355e-06,
"loss": 0.0009,
"step": 5175
},
{
"epoch": 20.08,
"learning_rate": 4.826130653266332e-06,
"loss": 0.0007,
"step": 5200
},
{
"epoch": 20.08,
"eval_loss": 0.8094375729560852,
"eval_runtime": 1764.7964,
"eval_samples_per_second": 4.899,
"eval_steps_per_second": 0.49,
"eval_wer": 46.78606274081791,
"step": 5200
},
{
"epoch": 20.17,
"learning_rate": 4.801005025125629e-06,
"loss": 0.0007,
"step": 5225
},
{
"epoch": 20.27,
"learning_rate": 4.7758793969849245e-06,
"loss": 0.0012,
"step": 5250
},
{
"epoch": 20.37,
"learning_rate": 4.750753768844221e-06,
"loss": 0.0008,
"step": 5275
},
{
"epoch": 20.46,
"learning_rate": 4.725628140703518e-06,
"loss": 0.001,
"step": 5300
},
{
"epoch": 20.46,
"eval_loss": 0.8205662369728088,
"eval_runtime": 1833.049,
"eval_samples_per_second": 4.717,
"eval_steps_per_second": 0.472,
"eval_wer": 47.15648823402954,
"step": 5300
},
{
"epoch": 20.56,
"learning_rate": 4.700502512562814e-06,
"loss": 0.0013,
"step": 5325
},
{
"epoch": 20.66,
"learning_rate": 4.675376884422111e-06,
"loss": 0.0007,
"step": 5350
},
{
"epoch": 20.75,
"learning_rate": 4.650251256281408e-06,
"loss": 0.0007,
"step": 5375
},
{
"epoch": 20.85,
"learning_rate": 4.625125628140703e-06,
"loss": 0.0008,
"step": 5400
},
{
"epoch": 20.85,
"eval_loss": 0.8015366792678833,
"eval_runtime": 1825.3401,
"eval_samples_per_second": 4.737,
"eval_steps_per_second": 0.474,
"eval_wer": 47.621173785781615,
"step": 5400
},
{
"epoch": 20.95,
"learning_rate": 4.600000000000001e-06,
"loss": 0.0007,
"step": 5425
},
{
"epoch": 21.04,
"learning_rate": 4.574874371859297e-06,
"loss": 0.0006,
"step": 5450
},
{
"epoch": 21.14,
"learning_rate": 4.549748743718593e-06,
"loss": 0.0005,
"step": 5475
},
{
"epoch": 21.24,
"learning_rate": 4.52462311557789e-06,
"loss": 0.0004,
"step": 5500
},
{
"epoch": 21.24,
"eval_loss": 0.8104465007781982,
"eval_runtime": 1735.3093,
"eval_samples_per_second": 4.982,
"eval_steps_per_second": 0.498,
"eval_wer": 47.220981958294054,
"step": 5500
},
{
"epoch": 21.33,
"learning_rate": 4.499497487437186e-06,
"loss": 0.0004,
"step": 5525
},
{
"epoch": 21.43,
"learning_rate": 4.474371859296483e-06,
"loss": 0.0005,
"step": 5550
},
{
"epoch": 21.53,
"learning_rate": 4.44924623115578e-06,
"loss": 0.001,
"step": 5575
},
{
"epoch": 21.62,
"learning_rate": 4.4241206030150755e-06,
"loss": 0.0003,
"step": 5600
},
{
"epoch": 21.62,
"eval_loss": 0.8020026683807373,
"eval_runtime": 1798.7419,
"eval_samples_per_second": 4.807,
"eval_steps_per_second": 0.481,
"eval_wer": 48.145392006085565,
"step": 5600
},
{
"epoch": 21.72,
"learning_rate": 4.398994974874372e-06,
"loss": 0.0008,
"step": 5625
},
{
"epoch": 21.81,
"learning_rate": 4.373869346733669e-06,
"loss": 0.0004,
"step": 5650
},
{
"epoch": 21.91,
"learning_rate": 4.348743718592965e-06,
"loss": 0.0006,
"step": 5675
},
{
"epoch": 22.01,
"learning_rate": 4.323618090452262e-06,
"loss": 0.0004,
"step": 5700
},
{
"epoch": 22.01,
"eval_loss": 0.8294846415519714,
"eval_runtime": 1827.1151,
"eval_samples_per_second": 4.732,
"eval_steps_per_second": 0.473,
"eval_wer": 46.94150915314779,
"step": 5700
},
{
"epoch": 22.1,
"learning_rate": 4.298492462311558e-06,
"loss": 0.0002,
"step": 5725
},
{
"epoch": 22.2,
"learning_rate": 4.273366834170854e-06,
"loss": 0.0003,
"step": 5750
},
{
"epoch": 22.3,
"learning_rate": 4.248241206030151e-06,
"loss": 0.0002,
"step": 5775
},
{
"epoch": 22.39,
"learning_rate": 4.223115577889448e-06,
"loss": 0.0004,
"step": 5800
},
{
"epoch": 22.39,
"eval_loss": 0.822780191898346,
"eval_runtime": 1778.1094,
"eval_samples_per_second": 4.862,
"eval_steps_per_second": 0.486,
"eval_wer": 46.61077210563741,
"step": 5800
},
{
"epoch": 22.49,
"learning_rate": 4.197989949748744e-06,
"loss": 0.0003,
"step": 5825
},
{
"epoch": 22.59,
"learning_rate": 4.172864321608041e-06,
"loss": 0.0003,
"step": 5850
},
{
"epoch": 22.68,
"learning_rate": 4.147738693467337e-06,
"loss": 0.0004,
"step": 5875
},
{
"epoch": 22.78,
"learning_rate": 4.122613065326633e-06,
"loss": 0.0005,
"step": 5900
},
{
"epoch": 22.78,
"eval_loss": 0.8386306166648865,
"eval_runtime": 1804.5855,
"eval_samples_per_second": 4.791,
"eval_steps_per_second": 0.479,
"eval_wer": 48.09743513419656,
"step": 5900
},
{
"epoch": 22.88,
"learning_rate": 4.09748743718593e-06,
"loss": 0.0007,
"step": 5925
},
{
"epoch": 22.97,
"learning_rate": 4.0723618090452265e-06,
"loss": 0.0013,
"step": 5950
},
{
"epoch": 23.07,
"learning_rate": 4.047236180904523e-06,
"loss": 0.0008,
"step": 5975
},
{
"epoch": 23.17,
"learning_rate": 4.02211055276382e-06,
"loss": 0.0007,
"step": 6000
},
{
"epoch": 23.17,
"eval_loss": 0.8414955735206604,
"eval_runtime": 1746.5835,
"eval_samples_per_second": 4.95,
"eval_steps_per_second": 0.495,
"eval_wer": 46.61407947611252,
"step": 6000
},
{
"epoch": 23.26,
"learning_rate": 3.997989949748744e-06,
"loss": 0.0004,
"step": 6025
},
{
"epoch": 23.36,
"learning_rate": 3.97286432160804e-06,
"loss": 0.0004,
"step": 6050
},
{
"epoch": 23.46,
"learning_rate": 3.947738693467337e-06,
"loss": 0.0006,
"step": 6075
},
{
"epoch": 23.55,
"learning_rate": 3.9226130653266335e-06,
"loss": 0.0003,
"step": 6100
},
{
"epoch": 23.55,
"eval_loss": 0.8283448815345764,
"eval_runtime": 1775.2653,
"eval_samples_per_second": 4.87,
"eval_steps_per_second": 0.487,
"eval_wer": 46.32633824477848,
"step": 6100
},
{
"epoch": 23.65,
"learning_rate": 3.89748743718593e-06,
"loss": 0.0003,
"step": 6125
},
{
"epoch": 23.75,
"learning_rate": 3.872361809045227e-06,
"loss": 0.0011,
"step": 6150
},
{
"epoch": 23.84,
"learning_rate": 3.8472361809045225e-06,
"loss": 0.0003,
"step": 6175
},
{
"epoch": 23.94,
"learning_rate": 3.822110552763819e-06,
"loss": 0.0005,
"step": 6200
},
{
"epoch": 23.94,
"eval_loss": 0.8341825604438782,
"eval_runtime": 1685.2846,
"eval_samples_per_second": 5.13,
"eval_steps_per_second": 0.513,
"eval_wer": 45.965834862992175,
"step": 6200
}
],
"logging_steps": 25,
"max_steps": 10000,
"num_train_epochs": 39,
"save_steps": 100,
"total_flos": 1.001636636700672e+20,
"trial_name": null,
"trial_params": null
}