wav2vec2-xls-r-1b-npsc-bokmaal / trainer_state.json
versae's picture
End of training
f07e473
raw
history blame
39.9 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 14.999677731227845,
"global_step": 23265,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.06,
"learning_rate": 4.800000000000001e-06,
"loss": 5.4592,
"step": 100
},
{
"epoch": 0.13,
"learning_rate": 9.800000000000001e-06,
"loss": 2.9124,
"step": 200
},
{
"epoch": 0.19,
"learning_rate": 1.48e-05,
"loss": 2.3526,
"step": 300
},
{
"epoch": 0.26,
"learning_rate": 1.9800000000000004e-05,
"loss": 1.1889,
"step": 400
},
{
"epoch": 0.32,
"learning_rate": 2.48e-05,
"loss": 0.8361,
"step": 500
},
{
"epoch": 0.32,
"eval_loss": 0.6304140686988831,
"eval_runtime": 553.4275,
"eval_samples_per_second": 9.824,
"eval_steps_per_second": 0.614,
"eval_wer": 0.4970241305264396,
"step": 500
},
{
"epoch": 0.39,
"learning_rate": 2.98e-05,
"loss": 0.6758,
"step": 600
},
{
"epoch": 0.45,
"learning_rate": 3.48e-05,
"loss": 0.6902,
"step": 700
},
{
"epoch": 0.52,
"learning_rate": 3.9800000000000005e-05,
"loss": 0.6335,
"step": 800
},
{
"epoch": 0.58,
"learning_rate": 4.4800000000000005e-05,
"loss": 0.5867,
"step": 900
},
{
"epoch": 0.64,
"learning_rate": 4.9800000000000004e-05,
"loss": 0.5703,
"step": 1000
},
{
"epoch": 0.64,
"eval_loss": 0.3195364475250244,
"eval_runtime": 552.5688,
"eval_samples_per_second": 9.839,
"eval_steps_per_second": 0.615,
"eval_wer": 0.27750595664153277,
"step": 1000
},
{
"epoch": 0.71,
"learning_rate": 5.4800000000000004e-05,
"loss": 0.5762,
"step": 1100
},
{
"epoch": 0.77,
"learning_rate": 5.9800000000000003e-05,
"loss": 0.5656,
"step": 1200
},
{
"epoch": 0.84,
"learning_rate": 6.48e-05,
"loss": 0.5864,
"step": 1300
},
{
"epoch": 0.9,
"learning_rate": 6.975e-05,
"loss": 0.5116,
"step": 1400
},
{
"epoch": 0.97,
"learning_rate": 7.475000000000001e-05,
"loss": 0.5451,
"step": 1500
},
{
"epoch": 0.97,
"eval_loss": 0.2699669599533081,
"eval_runtime": 552.0028,
"eval_samples_per_second": 9.85,
"eval_steps_per_second": 0.616,
"eval_wer": 0.2246070577622638,
"step": 1500
},
{
"epoch": 1.03,
"learning_rate": 7.975e-05,
"loss": 0.5134,
"step": 1600
},
{
"epoch": 1.1,
"learning_rate": 8.475000000000001e-05,
"loss": 0.467,
"step": 1700
},
{
"epoch": 1.16,
"learning_rate": 8.975e-05,
"loss": 0.4769,
"step": 1800
},
{
"epoch": 1.22,
"learning_rate": 9.47e-05,
"loss": 0.461,
"step": 1900
},
{
"epoch": 1.29,
"learning_rate": 9.970000000000001e-05,
"loss": 0.47,
"step": 2000
},
{
"epoch": 1.29,
"eval_loss": 0.25637131929397583,
"eval_runtime": 552.4533,
"eval_samples_per_second": 9.842,
"eval_steps_per_second": 0.615,
"eval_wer": 0.23294145332248228,
"step": 2000
},
{
"epoch": 1.35,
"learning_rate": 9.955795908770281e-05,
"loss": 0.4506,
"step": 2100
},
{
"epoch": 1.42,
"learning_rate": 9.908770279802493e-05,
"loss": 0.4154,
"step": 2200
},
{
"epoch": 1.48,
"learning_rate": 9.861744650834705e-05,
"loss": 0.4377,
"step": 2300
},
{
"epoch": 1.55,
"learning_rate": 9.814719021866918e-05,
"loss": 0.4723,
"step": 2400
},
{
"epoch": 1.61,
"learning_rate": 9.767693392899132e-05,
"loss": 0.4063,
"step": 2500
},
{
"epoch": 1.61,
"eval_loss": 0.2459278106689453,
"eval_runtime": 552.824,
"eval_samples_per_second": 9.835,
"eval_steps_per_second": 0.615,
"eval_wer": 0.20988949572004276,
"step": 2500
},
{
"epoch": 1.68,
"learning_rate": 9.720667763931343e-05,
"loss": 0.3807,
"step": 2600
},
{
"epoch": 1.74,
"learning_rate": 9.673642134963555e-05,
"loss": 0.4094,
"step": 2700
},
{
"epoch": 1.81,
"learning_rate": 9.626616505995768e-05,
"loss": 0.401,
"step": 2800
},
{
"epoch": 1.87,
"learning_rate": 9.579590877027981e-05,
"loss": 0.3849,
"step": 2900
},
{
"epoch": 1.93,
"learning_rate": 9.532565248060194e-05,
"loss": 0.374,
"step": 3000
},
{
"epoch": 1.93,
"eval_loss": 0.21745240688323975,
"eval_runtime": 553.0877,
"eval_samples_per_second": 9.83,
"eval_steps_per_second": 0.615,
"eval_wer": 0.18940649298439996,
"step": 3000
},
{
"epoch": 2.0,
"learning_rate": 9.485539619092406e-05,
"loss": 0.3684,
"step": 3100
},
{
"epoch": 2.06,
"learning_rate": 9.438513990124619e-05,
"loss": 0.3658,
"step": 3200
},
{
"epoch": 2.13,
"learning_rate": 9.391488361156831e-05,
"loss": 0.3432,
"step": 3300
},
{
"epoch": 2.19,
"learning_rate": 9.344462732189043e-05,
"loss": 0.336,
"step": 3400
},
{
"epoch": 2.26,
"learning_rate": 9.297437103221256e-05,
"loss": 0.3297,
"step": 3500
},
{
"epoch": 2.26,
"eval_loss": 0.20356999337673187,
"eval_runtime": 553.8668,
"eval_samples_per_second": 9.816,
"eval_steps_per_second": 0.614,
"eval_wer": 0.17550276015570612,
"step": 3500
},
{
"epoch": 2.32,
"learning_rate": 9.250411474253468e-05,
"loss": 0.3177,
"step": 3600
},
{
"epoch": 2.39,
"learning_rate": 9.203385845285682e-05,
"loss": 0.3111,
"step": 3700
},
{
"epoch": 2.45,
"learning_rate": 9.156360216317894e-05,
"loss": 0.3209,
"step": 3800
},
{
"epoch": 2.51,
"learning_rate": 9.109334587350105e-05,
"loss": 0.3439,
"step": 3900
},
{
"epoch": 2.58,
"learning_rate": 9.062308958382318e-05,
"loss": 0.3145,
"step": 4000
},
{
"epoch": 2.58,
"eval_loss": 0.1957111805677414,
"eval_runtime": 553.0089,
"eval_samples_per_second": 9.832,
"eval_steps_per_second": 0.615,
"eval_wer": 0.1757282790943944,
"step": 4000
},
{
"epoch": 2.64,
"learning_rate": 9.01575358570421e-05,
"loss": 0.3077,
"step": 4100
},
{
"epoch": 2.71,
"learning_rate": 8.968727956736421e-05,
"loss": 0.3147,
"step": 4200
},
{
"epoch": 2.77,
"learning_rate": 8.921702327768633e-05,
"loss": 0.3232,
"step": 4300
},
{
"epoch": 2.84,
"learning_rate": 8.874676698800847e-05,
"loss": 0.3195,
"step": 4400
},
{
"epoch": 2.9,
"learning_rate": 8.82765106983306e-05,
"loss": 0.3989,
"step": 4500
},
{
"epoch": 2.9,
"eval_loss": 0.19227643311023712,
"eval_runtime": 554.5698,
"eval_samples_per_second": 9.804,
"eval_steps_per_second": 0.613,
"eval_wer": 0.17229646915783384,
"step": 4500
},
{
"epoch": 2.97,
"learning_rate": 8.780625440865272e-05,
"loss": 0.3035,
"step": 4600
},
{
"epoch": 3.03,
"learning_rate": 8.733599811897484e-05,
"loss": 0.2997,
"step": 4700
},
{
"epoch": 3.09,
"learning_rate": 8.686574182929698e-05,
"loss": 0.2798,
"step": 4800
},
{
"epoch": 3.16,
"learning_rate": 8.639548553961909e-05,
"loss": 0.274,
"step": 4900
},
{
"epoch": 3.22,
"learning_rate": 8.592522924994122e-05,
"loss": 0.271,
"step": 5000
},
{
"epoch": 3.22,
"eval_loss": 0.18889272212982178,
"eval_runtime": 553.6667,
"eval_samples_per_second": 9.82,
"eval_steps_per_second": 0.614,
"eval_wer": 0.16491317520860502,
"step": 5000
},
{
"epoch": 3.29,
"learning_rate": 8.545497296026334e-05,
"loss": 0.2726,
"step": 5100
},
{
"epoch": 3.35,
"learning_rate": 8.498471667058548e-05,
"loss": 0.283,
"step": 5200
},
{
"epoch": 3.42,
"learning_rate": 8.45144603809076e-05,
"loss": 0.2837,
"step": 5300
},
{
"epoch": 3.48,
"learning_rate": 8.404420409122973e-05,
"loss": 0.2915,
"step": 5400
},
{
"epoch": 3.55,
"learning_rate": 8.357394780155185e-05,
"loss": 0.2758,
"step": 5500
},
{
"epoch": 3.55,
"eval_loss": 0.17681501805782318,
"eval_runtime": 557.2307,
"eval_samples_per_second": 9.757,
"eval_steps_per_second": 0.61,
"eval_wer": 0.15884377420651652,
"step": 5500
},
{
"epoch": 3.61,
"learning_rate": 8.310369151187397e-05,
"loss": 0.2699,
"step": 5600
},
{
"epoch": 3.67,
"learning_rate": 8.26334352221961e-05,
"loss": 0.2659,
"step": 5700
},
{
"epoch": 3.74,
"learning_rate": 8.216317893251822e-05,
"loss": 0.2759,
"step": 5800
},
{
"epoch": 3.8,
"learning_rate": 8.169292264284035e-05,
"loss": 0.2632,
"step": 5900
},
{
"epoch": 3.87,
"learning_rate": 8.122736891605925e-05,
"loss": 0.2683,
"step": 6000
},
{
"epoch": 3.87,
"eval_loss": 0.17198732495307922,
"eval_runtime": 555.6592,
"eval_samples_per_second": 9.785,
"eval_steps_per_second": 0.612,
"eval_wer": 0.1533822938217616,
"step": 6000
},
{
"epoch": 3.93,
"learning_rate": 8.075711262638138e-05,
"loss": 0.2628,
"step": 6100
},
{
"epoch": 4.0,
"learning_rate": 8.02868563367035e-05,
"loss": 0.3404,
"step": 6200
},
{
"epoch": 4.06,
"learning_rate": 7.981660004702564e-05,
"loss": 0.2964,
"step": 6300
},
{
"epoch": 4.13,
"learning_rate": 7.934634375734776e-05,
"loss": 0.2458,
"step": 6400
},
{
"epoch": 4.19,
"learning_rate": 7.887608746766987e-05,
"loss": 0.2341,
"step": 6500
},
{
"epoch": 4.19,
"eval_loss": 0.16891594231128693,
"eval_runtime": 553.369,
"eval_samples_per_second": 9.825,
"eval_steps_per_second": 0.614,
"eval_wer": 0.1470775687097375,
"step": 6500
},
{
"epoch": 4.26,
"learning_rate": 7.840583117799201e-05,
"loss": 0.2353,
"step": 6600
},
{
"epoch": 4.32,
"learning_rate": 7.794027745121092e-05,
"loss": 0.2433,
"step": 6700
},
{
"epoch": 4.38,
"learning_rate": 7.747002116153303e-05,
"loss": 0.2467,
"step": 6800
},
{
"epoch": 4.45,
"learning_rate": 7.699976487185517e-05,
"loss": 0.2423,
"step": 6900
},
{
"epoch": 4.51,
"learning_rate": 7.652950858217729e-05,
"loss": 0.2316,
"step": 7000
},
{
"epoch": 4.51,
"eval_loss": 0.1705636978149414,
"eval_runtime": 555.4457,
"eval_samples_per_second": 9.789,
"eval_steps_per_second": 0.612,
"eval_wer": 0.14053751948777785,
"step": 7000
},
{
"epoch": 4.58,
"learning_rate": 7.605925229249942e-05,
"loss": 0.2239,
"step": 7100
},
{
"epoch": 4.64,
"learning_rate": 7.558899600282154e-05,
"loss": 0.2305,
"step": 7200
},
{
"epoch": 4.71,
"learning_rate": 7.511873971314367e-05,
"loss": 0.2291,
"step": 7300
},
{
"epoch": 4.77,
"learning_rate": 7.464848342346579e-05,
"loss": 0.2411,
"step": 7400
},
{
"epoch": 4.84,
"learning_rate": 7.417822713378791e-05,
"loss": 0.2383,
"step": 7500
},
{
"epoch": 4.84,
"eval_loss": 0.16366632282733917,
"eval_runtime": 555.2596,
"eval_samples_per_second": 9.792,
"eval_steps_per_second": 0.612,
"eval_wer": 0.1426064106209615,
"step": 7500
},
{
"epoch": 4.9,
"learning_rate": 7.370797084411004e-05,
"loss": 0.2328,
"step": 7600
},
{
"epoch": 4.96,
"learning_rate": 7.323771455443218e-05,
"loss": 0.2575,
"step": 7700
},
{
"epoch": 5.03,
"learning_rate": 7.27674582647543e-05,
"loss": 0.2183,
"step": 7800
},
{
"epoch": 5.09,
"learning_rate": 7.229720197507642e-05,
"loss": 0.2254,
"step": 7900
},
{
"epoch": 5.16,
"learning_rate": 7.182694568539855e-05,
"loss": 0.2148,
"step": 8000
},
{
"epoch": 5.16,
"eval_loss": 0.1583578884601593,
"eval_runtime": 555.4598,
"eval_samples_per_second": 9.788,
"eval_steps_per_second": 0.612,
"eval_wer": 0.1347328581093669,
"step": 8000
},
{
"epoch": 5.22,
"learning_rate": 7.135668939572067e-05,
"loss": 0.2096,
"step": 8100
},
{
"epoch": 5.29,
"learning_rate": 7.08864331060428e-05,
"loss": 0.2142,
"step": 8200
},
{
"epoch": 5.35,
"learning_rate": 7.041617681636492e-05,
"loss": 0.2113,
"step": 8300
},
{
"epoch": 5.42,
"learning_rate": 6.994592052668704e-05,
"loss": 0.2177,
"step": 8400
},
{
"epoch": 5.48,
"learning_rate": 6.947566423700918e-05,
"loss": 0.2085,
"step": 8500
},
{
"epoch": 5.48,
"eval_loss": 0.16014662384986877,
"eval_runtime": 556.0462,
"eval_samples_per_second": 9.778,
"eval_steps_per_second": 0.611,
"eval_wer": 0.13866473177954053,
"step": 8500
},
{
"epoch": 5.54,
"learning_rate": 6.90054079473313e-05,
"loss": 0.2112,
"step": 8600
},
{
"epoch": 5.61,
"learning_rate": 6.853515165765343e-05,
"loss": 0.2051,
"step": 8700
},
{
"epoch": 5.67,
"learning_rate": 6.806489536797554e-05,
"loss": 0.2214,
"step": 8800
},
{
"epoch": 5.74,
"learning_rate": 6.759463907829768e-05,
"loss": 0.2155,
"step": 8900
},
{
"epoch": 5.8,
"learning_rate": 6.71243827886198e-05,
"loss": 0.2944,
"step": 9000
},
{
"epoch": 5.8,
"eval_loss": 0.1565837264060974,
"eval_runtime": 556.6846,
"eval_samples_per_second": 9.767,
"eval_steps_per_second": 0.611,
"eval_wer": 0.1293596242658378,
"step": 9000
},
{
"epoch": 5.87,
"learning_rate": 6.665412649894193e-05,
"loss": 0.2048,
"step": 9100
},
{
"epoch": 5.93,
"learning_rate": 6.618387020926405e-05,
"loss": 0.2089,
"step": 9200
},
{
"epoch": 6.0,
"learning_rate": 6.571831648248296e-05,
"loss": 0.1928,
"step": 9300
},
{
"epoch": 6.06,
"learning_rate": 6.524806019280508e-05,
"loss": 0.2435,
"step": 9400
},
{
"epoch": 6.13,
"learning_rate": 6.47778039031272e-05,
"loss": 0.1944,
"step": 9500
},
{
"epoch": 6.13,
"eval_loss": 0.14938245713710785,
"eval_runtime": 556.2024,
"eval_samples_per_second": 9.775,
"eval_steps_per_second": 0.611,
"eval_wer": 0.12710443487895517,
"step": 9500
},
{
"epoch": 6.19,
"learning_rate": 6.430754761344934e-05,
"loss": 0.1952,
"step": 9600
},
{
"epoch": 6.25,
"learning_rate": 6.383729132377145e-05,
"loss": 0.1923,
"step": 9700
},
{
"epoch": 6.32,
"learning_rate": 6.336703503409358e-05,
"loss": 0.1796,
"step": 9800
},
{
"epoch": 6.38,
"learning_rate": 6.28967787444157e-05,
"loss": 0.1851,
"step": 9900
},
{
"epoch": 6.45,
"learning_rate": 6.242652245473784e-05,
"loss": 0.1853,
"step": 10000
},
{
"epoch": 6.45,
"eval_loss": 0.1561189442873001,
"eval_runtime": 554.459,
"eval_samples_per_second": 9.806,
"eval_steps_per_second": 0.613,
"eval_wer": 0.12469236275211547,
"step": 10000
},
{
"epoch": 6.51,
"learning_rate": 6.195626616505996e-05,
"loss": 0.1918,
"step": 10100
},
{
"epoch": 6.58,
"learning_rate": 6.148600987538209e-05,
"loss": 0.1743,
"step": 10200
},
{
"epoch": 6.64,
"learning_rate": 6.101575358570422e-05,
"loss": 0.1967,
"step": 10300
},
{
"epoch": 6.71,
"learning_rate": 6.054549729602633e-05,
"loss": 0.1922,
"step": 10400
},
{
"epoch": 6.77,
"learning_rate": 6.0079943569245244e-05,
"loss": 0.235,
"step": 10500
},
{
"epoch": 6.77,
"eval_loss": 0.14610491693019867,
"eval_runtime": 555.3804,
"eval_samples_per_second": 9.79,
"eval_steps_per_second": 0.612,
"eval_wer": 0.1214958769254905,
"step": 10500
},
{
"epoch": 6.83,
"learning_rate": 5.9609687279567375e-05,
"loss": 0.1934,
"step": 10600
},
{
"epoch": 6.9,
"learning_rate": 5.9139430989889486e-05,
"loss": 0.1913,
"step": 10700
},
{
"epoch": 6.96,
"learning_rate": 5.866917470021162e-05,
"loss": 0.1801,
"step": 10800
},
{
"epoch": 7.03,
"learning_rate": 5.819891841053374e-05,
"loss": 0.1829,
"step": 10900
},
{
"epoch": 7.09,
"learning_rate": 5.772866212085587e-05,
"loss": 0.2286,
"step": 11000
},
{
"epoch": 7.09,
"eval_loss": 0.1447422057390213,
"eval_runtime": 557.5344,
"eval_samples_per_second": 9.752,
"eval_steps_per_second": 0.61,
"eval_wer": 0.11670114818555306,
"step": 11000
},
{
"epoch": 7.16,
"learning_rate": 5.7258405831177996e-05,
"loss": 0.1764,
"step": 11100
},
{
"epoch": 7.22,
"learning_rate": 5.6788149541500127e-05,
"loss": 0.1775,
"step": 11200
},
{
"epoch": 7.29,
"learning_rate": 5.631789325182225e-05,
"loss": 0.1602,
"step": 11300
},
{
"epoch": 7.35,
"learning_rate": 5.584763696214437e-05,
"loss": 0.1748,
"step": 11400
},
{
"epoch": 7.41,
"learning_rate": 5.537738067246649e-05,
"loss": 0.1781,
"step": 11500
},
{
"epoch": 7.41,
"eval_loss": 0.15018154680728912,
"eval_runtime": 554.8903,
"eval_samples_per_second": 9.798,
"eval_steps_per_second": 0.613,
"eval_wer": 0.11993685469716729,
"step": 11500
},
{
"epoch": 7.48,
"learning_rate": 5.490712438278862e-05,
"loss": 0.1682,
"step": 11600
},
{
"epoch": 7.54,
"learning_rate": 5.443686809311075e-05,
"loss": 0.1706,
"step": 11700
},
{
"epoch": 7.61,
"learning_rate": 5.397131436632965e-05,
"loss": 0.1554,
"step": 11800
},
{
"epoch": 7.67,
"learning_rate": 5.350105807665178e-05,
"loss": 0.1612,
"step": 11900
},
{
"epoch": 7.74,
"learning_rate": 5.30308017869739e-05,
"loss": 0.1714,
"step": 12000
},
{
"epoch": 7.74,
"eval_loss": 0.14246602356433868,
"eval_runtime": 557.0268,
"eval_samples_per_second": 9.761,
"eval_steps_per_second": 0.61,
"eval_wer": 0.11787776873523097,
"step": 12000
},
{
"epoch": 7.8,
"learning_rate": 5.2560545497296034e-05,
"loss": 0.1719,
"step": 12100
},
{
"epoch": 7.87,
"learning_rate": 5.209028920761816e-05,
"loss": 0.1663,
"step": 12200
},
{
"epoch": 7.93,
"learning_rate": 5.1620032917940275e-05,
"loss": 0.1597,
"step": 12300
},
{
"epoch": 7.99,
"learning_rate": 5.11497766282624e-05,
"loss": 0.1622,
"step": 12400
},
{
"epoch": 8.06,
"learning_rate": 5.067952033858453e-05,
"loss": 0.1725,
"step": 12500
},
{
"epoch": 8.06,
"eval_loss": 0.1426704078912735,
"eval_runtime": 554.3005,
"eval_samples_per_second": 9.809,
"eval_steps_per_second": 0.613,
"eval_wer": 0.11733848431662859,
"step": 12500
},
{
"epoch": 8.12,
"learning_rate": 5.0209264048906654e-05,
"loss": 0.1545,
"step": 12600
},
{
"epoch": 8.19,
"learning_rate": 4.9739007759228785e-05,
"loss": 0.1531,
"step": 12700
},
{
"epoch": 8.25,
"learning_rate": 4.92687514695509e-05,
"loss": 0.1546,
"step": 12800
},
{
"epoch": 8.32,
"learning_rate": 4.8798495179873033e-05,
"loss": 0.151,
"step": 12900
},
{
"epoch": 8.38,
"learning_rate": 4.832823889019516e-05,
"loss": 0.143,
"step": 13000
},
{
"epoch": 8.38,
"eval_loss": 0.1447545289993286,
"eval_runtime": 556.1274,
"eval_samples_per_second": 9.777,
"eval_steps_per_second": 0.611,
"eval_wer": 0.11421063468873484,
"step": 13000
},
{
"epoch": 8.45,
"learning_rate": 4.785798260051728e-05,
"loss": 0.1568,
"step": 13100
},
{
"epoch": 8.51,
"learning_rate": 4.7387726310839406e-05,
"loss": 0.1565,
"step": 13200
},
{
"epoch": 8.57,
"learning_rate": 4.6917470021161537e-05,
"loss": 0.1515,
"step": 13300
},
{
"epoch": 8.64,
"learning_rate": 4.644721373148366e-05,
"loss": 0.2059,
"step": 13400
},
{
"epoch": 8.7,
"learning_rate": 4.5976957441805785e-05,
"loss": 0.154,
"step": 13500
},
{
"epoch": 8.7,
"eval_loss": 0.13924145698547363,
"eval_runtime": 554.6978,
"eval_samples_per_second": 9.802,
"eval_steps_per_second": 0.613,
"eval_wer": 0.11040622824477629,
"step": 13500
},
{
"epoch": 8.77,
"learning_rate": 4.550670115212791e-05,
"loss": 0.1606,
"step": 13600
},
{
"epoch": 8.83,
"learning_rate": 4.504114742534681e-05,
"loss": 0.1532,
"step": 13700
},
{
"epoch": 8.9,
"learning_rate": 4.457089113566894e-05,
"loss": 0.1509,
"step": 13800
},
{
"epoch": 8.96,
"learning_rate": 4.4100634845991065e-05,
"loss": 0.1538,
"step": 13900
},
{
"epoch": 9.03,
"learning_rate": 4.3630378556313196e-05,
"loss": 0.1447,
"step": 14000
},
{
"epoch": 9.03,
"eval_loss": 0.1403558999300003,
"eval_runtime": 554.9504,
"eval_samples_per_second": 9.797,
"eval_steps_per_second": 0.613,
"eval_wer": 0.1094159059487974,
"step": 14000
},
{
"epoch": 9.09,
"learning_rate": 4.316012226663532e-05,
"loss": 0.1445,
"step": 14100
},
{
"epoch": 9.16,
"learning_rate": 4.2689865976957444e-05,
"loss": 0.1641,
"step": 14200
},
{
"epoch": 9.22,
"learning_rate": 4.221960968727957e-05,
"loss": 0.1448,
"step": 14300
},
{
"epoch": 9.28,
"learning_rate": 4.174935339760169e-05,
"loss": 0.1386,
"step": 14400
},
{
"epoch": 9.35,
"learning_rate": 4.127909710792382e-05,
"loss": 0.1471,
"step": 14500
},
{
"epoch": 9.35,
"eval_loss": 0.14039234817028046,
"eval_runtime": 559.6371,
"eval_samples_per_second": 9.715,
"eval_steps_per_second": 0.608,
"eval_wer": 0.10880798533146381,
"step": 14500
},
{
"epoch": 9.41,
"learning_rate": 4.080884081824595e-05,
"loss": 0.1514,
"step": 14600
},
{
"epoch": 9.48,
"learning_rate": 4.033858452856807e-05,
"loss": 0.148,
"step": 14700
},
{
"epoch": 9.54,
"learning_rate": 3.9868328238890195e-05,
"loss": 0.1463,
"step": 14800
},
{
"epoch": 9.61,
"learning_rate": 3.9398071949212326e-05,
"loss": 0.1412,
"step": 14900
},
{
"epoch": 9.67,
"learning_rate": 3.892781565953445e-05,
"loss": 0.1479,
"step": 15000
},
{
"epoch": 9.67,
"eval_loss": 0.14136268198490143,
"eval_runtime": 556.7019,
"eval_samples_per_second": 9.766,
"eval_steps_per_second": 0.611,
"eval_wer": 0.11328894859148715,
"step": 15000
},
{
"epoch": 9.74,
"learning_rate": 3.8457559369856574e-05,
"loss": 0.1541,
"step": 15100
},
{
"epoch": 9.8,
"learning_rate": 3.79873030801787e-05,
"loss": 0.1482,
"step": 15200
},
{
"epoch": 9.86,
"learning_rate": 3.751704679050083e-05,
"loss": 0.1462,
"step": 15300
},
{
"epoch": 9.93,
"learning_rate": 3.705149306371973e-05,
"loss": 0.1513,
"step": 15400
},
{
"epoch": 9.99,
"learning_rate": 3.6581236774041855e-05,
"loss": 0.1607,
"step": 15500
},
{
"epoch": 9.99,
"eval_loss": 0.14584468305110931,
"eval_runtime": 555.5836,
"eval_samples_per_second": 9.786,
"eval_steps_per_second": 0.612,
"eval_wer": 0.11712277054918764,
"step": 15500
},
{
"epoch": 10.06,
"learning_rate": 3.611098048436398e-05,
"loss": 0.1589,
"step": 15600
},
{
"epoch": 10.12,
"learning_rate": 3.56407241946861e-05,
"loss": 0.1588,
"step": 15700
},
{
"epoch": 10.19,
"learning_rate": 3.5170467905008234e-05,
"loss": 0.1646,
"step": 15800
},
{
"epoch": 10.25,
"learning_rate": 3.470021161533036e-05,
"loss": 0.17,
"step": 15900
},
{
"epoch": 10.32,
"learning_rate": 3.422995532565248e-05,
"loss": 0.166,
"step": 16000
},
{
"epoch": 10.32,
"eval_loss": 0.16516457498073578,
"eval_runtime": 557.4495,
"eval_samples_per_second": 9.753,
"eval_steps_per_second": 0.61,
"eval_wer": 0.12636904703540647,
"step": 16000
},
{
"epoch": 10.38,
"learning_rate": 3.3759699035974606e-05,
"loss": 0.1784,
"step": 16100
},
{
"epoch": 10.44,
"learning_rate": 3.328944274629674e-05,
"loss": 0.1678,
"step": 16200
},
{
"epoch": 10.51,
"learning_rate": 3.281918645661886e-05,
"loss": 0.1771,
"step": 16300
},
{
"epoch": 10.57,
"learning_rate": 3.235363272983776e-05,
"loss": 0.1885,
"step": 16400
},
{
"epoch": 10.64,
"learning_rate": 3.188337644015989e-05,
"loss": 0.188,
"step": 16500
},
{
"epoch": 10.64,
"eval_loss": 0.17125311493873596,
"eval_runtime": 557.8968,
"eval_samples_per_second": 9.746,
"eval_steps_per_second": 0.609,
"eval_wer": 0.13218351358506478,
"step": 16500
},
{
"epoch": 10.7,
"learning_rate": 3.141312015048201e-05,
"loss": 0.1847,
"step": 16600
},
{
"epoch": 10.77,
"learning_rate": 3.094286386080414e-05,
"loss": 0.2217,
"step": 16700
},
{
"epoch": 10.83,
"learning_rate": 3.0472607571126265e-05,
"loss": 0.1662,
"step": 16800
},
{
"epoch": 10.9,
"learning_rate": 3.0002351281448393e-05,
"loss": 0.1461,
"step": 16900
},
{
"epoch": 10.96,
"learning_rate": 2.9532094991770514e-05,
"loss": 0.1461,
"step": 17000
},
{
"epoch": 10.96,
"eval_loss": 0.14227142930030823,
"eval_runtime": 556.6368,
"eval_samples_per_second": 9.768,
"eval_steps_per_second": 0.611,
"eval_wer": 0.11110239540333572,
"step": 17000
},
{
"epoch": 11.03,
"learning_rate": 2.906183870209264e-05,
"loss": 0.1583,
"step": 17100
},
{
"epoch": 11.09,
"learning_rate": 2.859158241241477e-05,
"loss": 0.1221,
"step": 17200
},
{
"epoch": 11.15,
"learning_rate": 2.812132612273689e-05,
"loss": 0.138,
"step": 17300
},
{
"epoch": 11.22,
"learning_rate": 2.7651069833059017e-05,
"loss": 0.1269,
"step": 17400
},
{
"epoch": 11.28,
"learning_rate": 2.7180813543381144e-05,
"loss": 0.1289,
"step": 17500
},
{
"epoch": 11.28,
"eval_loss": 0.13876527547836304,
"eval_runtime": 555.2154,
"eval_samples_per_second": 9.793,
"eval_steps_per_second": 0.612,
"eval_wer": 0.10974928177120613,
"step": 17500
},
{
"epoch": 11.35,
"learning_rate": 2.671055725370327e-05,
"loss": 0.1395,
"step": 17600
},
{
"epoch": 11.41,
"learning_rate": 2.6240300964025392e-05,
"loss": 0.1397,
"step": 17700
},
{
"epoch": 11.48,
"learning_rate": 2.577004467434752e-05,
"loss": 0.123,
"step": 17800
},
{
"epoch": 11.54,
"learning_rate": 2.5299788384669647e-05,
"loss": 0.1486,
"step": 17900
},
{
"epoch": 11.61,
"learning_rate": 2.482953209499177e-05,
"loss": 0.1273,
"step": 18000
},
{
"epoch": 11.61,
"eval_loss": 0.1438097506761551,
"eval_runtime": 554.8582,
"eval_samples_per_second": 9.799,
"eval_steps_per_second": 0.613,
"eval_wer": 0.10742545618559228,
"step": 18000
},
{
"epoch": 11.67,
"learning_rate": 2.4363978368210676e-05,
"loss": 0.1313,
"step": 18100
},
{
"epoch": 11.73,
"learning_rate": 2.38937220785328e-05,
"loss": 0.1365,
"step": 18200
},
{
"epoch": 11.8,
"learning_rate": 2.3423465788854924e-05,
"loss": 0.1335,
"step": 18300
},
{
"epoch": 11.86,
"learning_rate": 2.295320949917705e-05,
"loss": 0.1321,
"step": 18400
},
{
"epoch": 11.93,
"learning_rate": 2.2482953209499176e-05,
"loss": 0.1317,
"step": 18500
},
{
"epoch": 11.93,
"eval_loss": 0.13120408356189728,
"eval_runtime": 555.4622,
"eval_samples_per_second": 9.788,
"eval_steps_per_second": 0.612,
"eval_wer": 0.10656260111582849,
"step": 18500
},
{
"epoch": 11.99,
"learning_rate": 2.2012696919821303e-05,
"loss": 0.137,
"step": 18600
},
{
"epoch": 12.06,
"learning_rate": 2.154244063014343e-05,
"loss": 0.1224,
"step": 18700
},
{
"epoch": 12.12,
"learning_rate": 2.1072184340465555e-05,
"loss": 0.1293,
"step": 18800
},
{
"epoch": 12.19,
"learning_rate": 2.0606630613684456e-05,
"loss": 0.1245,
"step": 18900
},
{
"epoch": 12.25,
"learning_rate": 2.0136374324006584e-05,
"loss": 0.1448,
"step": 19000
},
{
"epoch": 12.25,
"eval_loss": 0.1446371227502823,
"eval_runtime": 558.424,
"eval_samples_per_second": 9.736,
"eval_steps_per_second": 0.609,
"eval_wer": 0.10419955484522538,
"step": 19000
},
{
"epoch": 12.31,
"learning_rate": 1.9666118034328708e-05,
"loss": 0.1472,
"step": 19100
},
{
"epoch": 12.38,
"learning_rate": 1.9195861744650835e-05,
"loss": 0.15,
"step": 19200
},
{
"epoch": 12.44,
"learning_rate": 1.8725605454972963e-05,
"loss": 0.1434,
"step": 19300
},
{
"epoch": 12.51,
"learning_rate": 1.8255349165295087e-05,
"loss": 0.1439,
"step": 19400
},
{
"epoch": 12.57,
"learning_rate": 1.7785092875617214e-05,
"loss": 0.1424,
"step": 19500
},
{
"epoch": 12.57,
"eval_loss": 0.13855019211769104,
"eval_runtime": 555.0962,
"eval_samples_per_second": 9.795,
"eval_steps_per_second": 0.613,
"eval_wer": 0.10152274309470814,
"step": 19500
},
{
"epoch": 12.64,
"learning_rate": 1.7314836585939338e-05,
"loss": 0.1422,
"step": 19600
},
{
"epoch": 12.7,
"learning_rate": 1.6844580296261466e-05,
"loss": 0.1426,
"step": 19700
},
{
"epoch": 12.77,
"learning_rate": 1.637432400658359e-05,
"loss": 0.1395,
"step": 19800
},
{
"epoch": 12.83,
"learning_rate": 1.5904067716905717e-05,
"loss": 0.1363,
"step": 19900
},
{
"epoch": 12.89,
"learning_rate": 1.543381142722784e-05,
"loss": 0.1392,
"step": 20000
},
{
"epoch": 12.89,
"eval_loss": 0.13786287605762482,
"eval_runtime": 553.5953,
"eval_samples_per_second": 9.821,
"eval_steps_per_second": 0.614,
"eval_wer": 0.10046378459999804,
"step": 20000
},
{
"epoch": 12.96,
"learning_rate": 1.4963555137549964e-05,
"loss": 0.1387,
"step": 20100
},
{
"epoch": 13.02,
"learning_rate": 1.4493298847872091e-05,
"loss": 0.1388,
"step": 20200
},
{
"epoch": 13.09,
"learning_rate": 1.4023042558194215e-05,
"loss": 0.1259,
"step": 20300
},
{
"epoch": 13.15,
"learning_rate": 1.3552786268516343e-05,
"loss": 0.1475,
"step": 20400
},
{
"epoch": 13.22,
"learning_rate": 1.3082529978838467e-05,
"loss": 0.1408,
"step": 20500
},
{
"epoch": 13.22,
"eval_loss": 0.1407657265663147,
"eval_runtime": 557.4847,
"eval_samples_per_second": 9.753,
"eval_steps_per_second": 0.61,
"eval_wer": 0.09921852785158893,
"step": 20500
},
{
"epoch": 13.28,
"learning_rate": 1.2612273689160592e-05,
"loss": 0.1444,
"step": 20600
},
{
"epoch": 13.35,
"learning_rate": 1.214201739948272e-05,
"loss": 0.1324,
"step": 20700
},
{
"epoch": 13.41,
"learning_rate": 1.1671761109804846e-05,
"loss": 0.1397,
"step": 20800
},
{
"epoch": 13.48,
"learning_rate": 1.120150482012697e-05,
"loss": 0.1382,
"step": 20900
},
{
"epoch": 13.54,
"learning_rate": 1.0731248530449096e-05,
"loss": 0.1239,
"step": 21000
},
{
"epoch": 13.54,
"eval_loss": 0.13379834592342377,
"eval_runtime": 553.3736,
"eval_samples_per_second": 9.825,
"eval_steps_per_second": 0.614,
"eval_wer": 0.09677704021100729,
"step": 21000
},
{
"epoch": 13.6,
"learning_rate": 1.0260992240771221e-05,
"loss": 0.122,
"step": 21100
},
{
"epoch": 13.67,
"learning_rate": 9.790735951093347e-06,
"loss": 0.1209,
"step": 21200
},
{
"epoch": 13.73,
"learning_rate": 9.320479661415473e-06,
"loss": 0.1248,
"step": 21300
},
{
"epoch": 13.8,
"learning_rate": 8.850223371737597e-06,
"loss": 0.1198,
"step": 21400
},
{
"epoch": 13.86,
"learning_rate": 8.379967082059723e-06,
"loss": 0.1244,
"step": 21500
},
{
"epoch": 13.86,
"eval_loss": 0.1335345059633255,
"eval_runtime": 554.5112,
"eval_samples_per_second": 9.805,
"eval_steps_per_second": 0.613,
"eval_wer": 0.09565925068881329,
"step": 21500
},
{
"epoch": 13.93,
"learning_rate": 7.914413355278628e-06,
"loss": 0.1235,
"step": 21600
},
{
"epoch": 13.99,
"learning_rate": 7.444157065600752e-06,
"loss": 0.1367,
"step": 21700
},
{
"epoch": 14.06,
"learning_rate": 6.973900775922878e-06,
"loss": 0.1288,
"step": 21800
},
{
"epoch": 14.12,
"learning_rate": 6.503644486245005e-06,
"loss": 0.1358,
"step": 21900
},
{
"epoch": 14.18,
"learning_rate": 6.03338819656713e-06,
"loss": 0.1254,
"step": 22000
},
{
"epoch": 14.18,
"eval_loss": 0.13817058503627777,
"eval_runtime": 555.1553,
"eval_samples_per_second": 9.794,
"eval_steps_per_second": 0.612,
"eval_wer": 0.09503171972898507,
"step": 22000
},
{
"epoch": 14.25,
"learning_rate": 5.5631319068892555e-06,
"loss": 0.1343,
"step": 22100
},
{
"epoch": 14.31,
"learning_rate": 5.097578180108159e-06,
"loss": 0.131,
"step": 22200
},
{
"epoch": 14.38,
"learning_rate": 4.627321890430284e-06,
"loss": 0.1503,
"step": 22300
},
{
"epoch": 14.44,
"learning_rate": 4.15706560075241e-06,
"loss": 0.1543,
"step": 22400
},
{
"epoch": 14.51,
"learning_rate": 3.6868093110745355e-06,
"loss": 0.1597,
"step": 22500
},
{
"epoch": 14.51,
"eval_loss": 0.15444068610668182,
"eval_runtime": 553.9922,
"eval_samples_per_second": 9.814,
"eval_steps_per_second": 0.614,
"eval_wer": 0.09704177983468482,
"step": 22500
},
{
"epoch": 14.57,
"learning_rate": 3.2165530213966613e-06,
"loss": 0.1588,
"step": 22600
},
{
"epoch": 14.64,
"learning_rate": 2.746296731718787e-06,
"loss": 0.1616,
"step": 22700
},
{
"epoch": 14.7,
"learning_rate": 2.2760404420409124e-06,
"loss": 0.1701,
"step": 22800
},
{
"epoch": 14.76,
"learning_rate": 1.8057841523630381e-06,
"loss": 0.1569,
"step": 22900
},
{
"epoch": 14.83,
"learning_rate": 1.3355278626851635e-06,
"loss": 0.1566,
"step": 23000
},
{
"epoch": 14.83,
"eval_loss": 0.1588866114616394,
"eval_runtime": 554.1492,
"eval_samples_per_second": 9.811,
"eval_steps_per_second": 0.614,
"eval_wer": 0.09631619716238345,
"step": 23000
},
{
"epoch": 14.89,
"learning_rate": 8.652715730072889e-07,
"loss": 0.1645,
"step": 23100
},
{
"epoch": 14.96,
"learning_rate": 3.950152833294146e-07,
"loss": 0.1684,
"step": 23200
},
{
"epoch": 15.0,
"step": 23265,
"total_flos": 4.936819874574462e+20,
"train_loss": 0.2744614067626927,
"train_runtime": 128452.941,
"train_samples_per_second": 5.797,
"train_steps_per_second": 0.181
}
],
"max_steps": 23265,
"num_train_epochs": 15,
"total_flos": 4.936819874574462e+20,
"trial_name": null,
"trial_params": null
}