xls-r-300m-sv-robust / trainer_state.json
marinone94's picture
merge with 100 steps
3042312
raw history blame
No virus
27.2 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 99.99453551912568,
"global_step": 9100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.55,
"learning_rate": 5.384615384615385e-06,
"loss": 14.944,
"step": 50
},
{
"epoch": 1.1,
"learning_rate": 1.076923076923077e-05,
"loss": 14.3202,
"step": 100
},
{
"epoch": 1.64,
"learning_rate": 1.6263736263736265e-05,
"loss": 8.0014,
"step": 150
},
{
"epoch": 2.2,
"learning_rate": 2.175824175824176e-05,
"loss": 3.9265,
"step": 200
},
{
"epoch": 2.74,
"learning_rate": 2.7252747252747255e-05,
"loss": 3.2842,
"step": 250
},
{
"epoch": 3.3,
"learning_rate": 3.274725274725275e-05,
"loss": 3.1869,
"step": 300
},
{
"epoch": 3.84,
"learning_rate": 3.824175824175824e-05,
"loss": 3.108,
"step": 350
},
{
"epoch": 4.39,
"learning_rate": 4.3736263736263734e-05,
"loss": 3.1175,
"step": 400
},
{
"epoch": 4.94,
"learning_rate": 4.923076923076923e-05,
"loss": 3.0525,
"step": 450
},
{
"epoch": 5.49,
"learning_rate": 5.472527472527473e-05,
"loss": 3.0418,
"step": 500
},
{
"epoch": 5.49,
"eval_loss": 3.0175631046295166,
"eval_runtime": 180.0397,
"eval_samples_per_second": 26.9,
"eval_steps_per_second": 0.844,
"eval_wer": 1.0,
"step": 500
},
{
"epoch": 6.04,
"learning_rate": 6.021978021978022e-05,
"loss": 2.9976,
"step": 550
},
{
"epoch": 6.59,
"learning_rate": 6.571428571428571e-05,
"loss": 2.9326,
"step": 600
},
{
"epoch": 7.14,
"learning_rate": 7.120879120879122e-05,
"loss": 2.9006,
"step": 650
},
{
"epoch": 7.69,
"learning_rate": 7.67032967032967e-05,
"loss": 2.5847,
"step": 700
},
{
"epoch": 8.24,
"learning_rate": 8.219780219780219e-05,
"loss": 1.9329,
"step": 750
},
{
"epoch": 8.79,
"learning_rate": 8.76923076923077e-05,
"loss": 1.5005,
"step": 800
},
{
"epoch": 9.34,
"learning_rate": 9.318681318681319e-05,
"loss": 1.3603,
"step": 850
},
{
"epoch": 9.89,
"learning_rate": 9.868131868131869e-05,
"loss": 1.2882,
"step": 900
},
{
"epoch": 10.44,
"learning_rate": 0.00010417582417582417,
"loss": 1.2259,
"step": 950
},
{
"epoch": 10.98,
"learning_rate": 0.00010967032967032966,
"loss": 1.1819,
"step": 1000
},
{
"epoch": 10.98,
"eval_loss": 0.2561783790588379,
"eval_runtime": 183.3784,
"eval_samples_per_second": 26.41,
"eval_steps_per_second": 0.829,
"eval_wer": 0.21680386034629576,
"step": 1000
},
{
"epoch": 11.54,
"learning_rate": 0.00011516483516483517,
"loss": 1.1526,
"step": 1050
},
{
"epoch": 12.09,
"learning_rate": 0.00012065934065934066,
"loss": 1.1248,
"step": 1100
},
{
"epoch": 12.63,
"learning_rate": 0.00012615384615384615,
"loss": 1.0837,
"step": 1150
},
{
"epoch": 13.19,
"learning_rate": 0.00013164835164835166,
"loss": 1.0713,
"step": 1200
},
{
"epoch": 13.73,
"learning_rate": 0.00013714285714285716,
"loss": 1.0511,
"step": 1250
},
{
"epoch": 14.28,
"learning_rate": 0.00014263736263736264,
"loss": 1.0454,
"step": 1300
},
{
"epoch": 14.83,
"learning_rate": 0.00014813186813186812,
"loss": 1.0157,
"step": 1350
},
{
"epoch": 15.38,
"learning_rate": 0.00015362637362637362,
"loss": 1.0207,
"step": 1400
},
{
"epoch": 15.93,
"learning_rate": 0.00015912087912087913,
"loss": 0.9859,
"step": 1450
},
{
"epoch": 16.48,
"learning_rate": 0.0001646153846153846,
"loss": 1.0032,
"step": 1500
},
{
"epoch": 16.48,
"eval_loss": 0.1746312528848648,
"eval_runtime": 184.5587,
"eval_samples_per_second": 26.241,
"eval_steps_per_second": 0.824,
"eval_wer": 0.15461254612546124,
"step": 1500
},
{
"epoch": 17.03,
"learning_rate": 0.0001701098901098901,
"loss": 0.974,
"step": 1550
},
{
"epoch": 17.58,
"learning_rate": 0.00017560439560439562,
"loss": 0.9528,
"step": 1600
},
{
"epoch": 18.13,
"learning_rate": 0.0001810989010989011,
"loss": 0.9774,
"step": 1650
},
{
"epoch": 18.68,
"learning_rate": 0.0001865934065934066,
"loss": 0.9495,
"step": 1700
},
{
"epoch": 19.23,
"learning_rate": 0.0001920879120879121,
"loss": 0.9525,
"step": 1750
},
{
"epoch": 19.78,
"learning_rate": 0.00019758241758241759,
"loss": 0.9385,
"step": 1800
},
{
"epoch": 20.33,
"learning_rate": 0.00020307692307692306,
"loss": 0.9422,
"step": 1850
},
{
"epoch": 20.87,
"learning_rate": 0.00020857142857142857,
"loss": 0.9028,
"step": 1900
},
{
"epoch": 21.43,
"learning_rate": 0.00021406593406593407,
"loss": 0.9091,
"step": 1950
},
{
"epoch": 21.97,
"learning_rate": 0.00021956043956043955,
"loss": 0.9077,
"step": 2000
},
{
"epoch": 21.97,
"eval_loss": 0.15996481478214264,
"eval_runtime": 182.6325,
"eval_samples_per_second": 26.518,
"eval_steps_per_second": 0.832,
"eval_wer": 0.13391995458416123,
"step": 2000
},
{
"epoch": 22.52,
"learning_rate": 0.00022505494505494506,
"loss": 0.9073,
"step": 2050
},
{
"epoch": 23.08,
"learning_rate": 0.00023054945054945056,
"loss": 0.9046,
"step": 2100
},
{
"epoch": 23.62,
"learning_rate": 0.00023604395604395604,
"loss": 0.8864,
"step": 2150
},
{
"epoch": 24.17,
"learning_rate": 0.00024153846153846155,
"loss": 0.8888,
"step": 2200
},
{
"epoch": 24.72,
"learning_rate": 0.00024703296703296705,
"loss": 0.8757,
"step": 2250
},
{
"epoch": 25.27,
"learning_rate": 0.00024915750915750914,
"loss": 0.8819,
"step": 2300
},
{
"epoch": 25.82,
"learning_rate": 0.00024732600732600734,
"loss": 0.8722,
"step": 2350
},
{
"epoch": 26.37,
"learning_rate": 0.0002454945054945055,
"loss": 0.8757,
"step": 2400
},
{
"epoch": 26.92,
"learning_rate": 0.00024366300366300369,
"loss": 0.8544,
"step": 2450
},
{
"epoch": 27.47,
"learning_rate": 0.00024183150183150186,
"loss": 0.8687,
"step": 2500
},
{
"epoch": 27.47,
"eval_loss": 0.1647317260503769,
"eval_runtime": 181.9124,
"eval_samples_per_second": 26.623,
"eval_steps_per_second": 0.836,
"eval_wer": 0.1378370706783991,
"step": 2500
},
{
"epoch": 28.02,
"learning_rate": 0.00024,
"loss": 0.8661,
"step": 2550
},
{
"epoch": 28.57,
"learning_rate": 0.00023816849816849818,
"loss": 0.8187,
"step": 2600
},
{
"epoch": 29.12,
"learning_rate": 0.00023633699633699635,
"loss": 0.8425,
"step": 2650
},
{
"epoch": 29.67,
"learning_rate": 0.00023450549450549453,
"loss": 0.8455,
"step": 2700
},
{
"epoch": 30.22,
"learning_rate": 0.00023267399267399267,
"loss": 0.8369,
"step": 2750
},
{
"epoch": 30.77,
"learning_rate": 0.00023084249084249085,
"loss": 0.8253,
"step": 2800
},
{
"epoch": 31.32,
"learning_rate": 0.00022901098901098902,
"loss": 0.8213,
"step": 2850
},
{
"epoch": 31.86,
"learning_rate": 0.0002271794871794872,
"loss": 0.808,
"step": 2900
},
{
"epoch": 32.42,
"learning_rate": 0.00022534798534798534,
"loss": 0.8352,
"step": 2950
},
{
"epoch": 32.96,
"learning_rate": 0.00022351648351648352,
"loss": 0.8081,
"step": 3000
},
{
"epoch": 32.96,
"eval_loss": 0.16083544492721558,
"eval_runtime": 184.6287,
"eval_samples_per_second": 26.231,
"eval_steps_per_second": 0.823,
"eval_wer": 0.135310814646608,
"step": 3000
},
{
"epoch": 33.51,
"learning_rate": 0.0002216849816849817,
"loss": 0.8157,
"step": 3050
},
{
"epoch": 34.07,
"learning_rate": 0.00021985347985347986,
"loss": 0.8035,
"step": 3100
},
{
"epoch": 34.61,
"learning_rate": 0.00021802197802197804,
"loss": 0.7844,
"step": 3150
},
{
"epoch": 35.16,
"learning_rate": 0.00021619047619047619,
"loss": 0.7835,
"step": 3200
},
{
"epoch": 35.71,
"learning_rate": 0.00021435897435897436,
"loss": 0.7854,
"step": 3250
},
{
"epoch": 36.26,
"learning_rate": 0.00021252747252747253,
"loss": 0.7845,
"step": 3300
},
{
"epoch": 36.81,
"learning_rate": 0.0002106959706959707,
"loss": 0.7677,
"step": 3350
},
{
"epoch": 37.36,
"learning_rate": 0.00020886446886446885,
"loss": 0.7933,
"step": 3400
},
{
"epoch": 37.91,
"learning_rate": 0.00020703296703296703,
"loss": 0.7781,
"step": 3450
},
{
"epoch": 38.46,
"learning_rate": 0.0002052014652014652,
"loss": 0.7923,
"step": 3500
},
{
"epoch": 38.46,
"eval_loss": 0.15337252616882324,
"eval_runtime": 182.8057,
"eval_samples_per_second": 26.493,
"eval_steps_per_second": 0.831,
"eval_wer": 0.12773204655123474,
"step": 3500
},
{
"epoch": 39.01,
"learning_rate": 0.00020336996336996338,
"loss": 0.7861,
"step": 3550
},
{
"epoch": 39.56,
"learning_rate": 0.00020157509157509157,
"loss": 0.767,
"step": 3600
},
{
"epoch": 40.11,
"learning_rate": 0.00019974358974358974,
"loss": 0.7688,
"step": 3650
},
{
"epoch": 40.66,
"learning_rate": 0.0001979120879120879,
"loss": 0.7515,
"step": 3700
},
{
"epoch": 41.21,
"learning_rate": 0.00019608058608058606,
"loss": 0.7602,
"step": 3750
},
{
"epoch": 41.75,
"learning_rate": 0.00019424908424908423,
"loss": 0.7565,
"step": 3800
},
{
"epoch": 42.31,
"learning_rate": 0.0001924175824175824,
"loss": 0.7646,
"step": 3850
},
{
"epoch": 42.85,
"learning_rate": 0.0001905860805860806,
"loss": 0.7547,
"step": 3900
},
{
"epoch": 43.4,
"learning_rate": 0.00018875457875457878,
"loss": 0.7474,
"step": 3950
},
{
"epoch": 43.95,
"learning_rate": 0.00018692307692307693,
"loss": 0.7349,
"step": 4000
},
{
"epoch": 43.95,
"eval_loss": 0.15459321439266205,
"eval_runtime": 182.8482,
"eval_samples_per_second": 26.486,
"eval_steps_per_second": 0.831,
"eval_wer": 0.13034345728072666,
"step": 4000
},
{
"epoch": 44.5,
"learning_rate": 0.0001850915750915751,
"loss": 0.7447,
"step": 4050
},
{
"epoch": 45.05,
"learning_rate": 0.00018326007326007328,
"loss": 0.7442,
"step": 4100
},
{
"epoch": 45.6,
"learning_rate": 0.00018142857142857145,
"loss": 0.7311,
"step": 4150
},
{
"epoch": 46.15,
"learning_rate": 0.0001795970695970696,
"loss": 0.7432,
"step": 4200
},
{
"epoch": 46.7,
"learning_rate": 0.00017776556776556777,
"loss": 0.7361,
"step": 4250
},
{
"epoch": 47.25,
"learning_rate": 0.00017593406593406595,
"loss": 0.7358,
"step": 4300
},
{
"epoch": 47.8,
"learning_rate": 0.00017410256410256412,
"loss": 0.718,
"step": 4350
},
{
"epoch": 48.35,
"learning_rate": 0.00017227106227106227,
"loss": 0.7327,
"step": 4400
},
{
"epoch": 48.9,
"learning_rate": 0.00017043956043956044,
"loss": 0.7129,
"step": 4450
},
{
"epoch": 49.45,
"learning_rate": 0.00016860805860805861,
"loss": 0.7199,
"step": 4500
},
{
"epoch": 49.45,
"eval_loss": 0.16171683371067047,
"eval_runtime": 181.6699,
"eval_samples_per_second": 26.658,
"eval_steps_per_second": 0.837,
"eval_wer": 0.12770366165200114,
"step": 4500
},
{
"epoch": 49.99,
"learning_rate": 0.0001667765567765568,
"loss": 0.7156,
"step": 4550
},
{
"epoch": 50.55,
"learning_rate": 0.00016494505494505496,
"loss": 0.7265,
"step": 4600
},
{
"epoch": 51.1,
"learning_rate": 0.0001631135531135531,
"loss": 0.7169,
"step": 4650
},
{
"epoch": 51.64,
"learning_rate": 0.00016128205128205128,
"loss": 0.7022,
"step": 4700
},
{
"epoch": 52.2,
"learning_rate": 0.00015945054945054946,
"loss": 0.7182,
"step": 4750
},
{
"epoch": 52.74,
"learning_rate": 0.00015761904761904763,
"loss": 0.7239,
"step": 4800
},
{
"epoch": 53.3,
"learning_rate": 0.00015578754578754578,
"loss": 0.702,
"step": 4850
},
{
"epoch": 53.84,
"learning_rate": 0.00015395604395604395,
"loss": 0.6951,
"step": 4900
},
{
"epoch": 54.39,
"learning_rate": 0.00015212454212454213,
"loss": 0.6972,
"step": 4950
},
{
"epoch": 54.94,
"learning_rate": 0.0001502930402930403,
"loss": 0.7028,
"step": 5000
},
{
"epoch": 54.94,
"eval_loss": 0.15723808109760284,
"eval_runtime": 179.3818,
"eval_samples_per_second": 26.998,
"eval_steps_per_second": 0.847,
"eval_wer": 0.1286687482259438,
"step": 5000
},
{
"epoch": 55.49,
"learning_rate": 0.00014846153846153845,
"loss": 0.6829,
"step": 5050
},
{
"epoch": 56.04,
"learning_rate": 0.00014663003663003662,
"loss": 0.6864,
"step": 5100
},
{
"epoch": 56.59,
"learning_rate": 0.0001447985347985348,
"loss": 0.6854,
"step": 5150
},
{
"epoch": 57.14,
"learning_rate": 0.00014296703296703297,
"loss": 0.6759,
"step": 5200
},
{
"epoch": 57.69,
"learning_rate": 0.00014113553113553112,
"loss": 0.6775,
"step": 5250
},
{
"epoch": 58.24,
"learning_rate": 0.0001393040293040293,
"loss": 0.7114,
"step": 5300
},
{
"epoch": 58.79,
"learning_rate": 0.00013747252747252746,
"loss": 0.6791,
"step": 5350
},
{
"epoch": 59.34,
"learning_rate": 0.00013564102564102566,
"loss": 0.6862,
"step": 5400
},
{
"epoch": 59.89,
"learning_rate": 0.00013380952380952384,
"loss": 0.6716,
"step": 5450
},
{
"epoch": 60.44,
"learning_rate": 0.00013197802197802198,
"loss": 0.6912,
"step": 5500
},
{
"epoch": 60.44,
"eval_loss": 0.15595602989196777,
"eval_runtime": 177.0801,
"eval_samples_per_second": 27.349,
"eval_steps_per_second": 0.858,
"eval_wer": 0.12489355662787398,
"step": 5500
},
{
"epoch": 60.98,
"learning_rate": 0.00013014652014652016,
"loss": 0.6743,
"step": 5550
},
{
"epoch": 61.54,
"learning_rate": 0.00012831501831501833,
"loss": 0.6683,
"step": 5600
},
{
"epoch": 62.09,
"learning_rate": 0.0001264835164835165,
"loss": 0.6654,
"step": 5650
},
{
"epoch": 62.63,
"learning_rate": 0.00012465201465201465,
"loss": 0.6583,
"step": 5700
},
{
"epoch": 63.19,
"learning_rate": 0.00012282051282051283,
"loss": 0.6634,
"step": 5750
},
{
"epoch": 63.73,
"learning_rate": 0.000120989010989011,
"loss": 0.6566,
"step": 5800
},
{
"epoch": 64.28,
"learning_rate": 0.00011915750915750916,
"loss": 0.67,
"step": 5850
},
{
"epoch": 64.83,
"learning_rate": 0.00011732600732600734,
"loss": 0.6524,
"step": 5900
},
{
"epoch": 65.38,
"learning_rate": 0.0001154945054945055,
"loss": 0.6673,
"step": 5950
},
{
"epoch": 65.93,
"learning_rate": 0.00011366300366300367,
"loss": 0.6492,
"step": 6000
},
{
"epoch": 65.93,
"eval_loss": 0.15421651303768158,
"eval_runtime": 182.1682,
"eval_samples_per_second": 26.585,
"eval_steps_per_second": 0.834,
"eval_wer": 0.12600056769798468,
"step": 6000
},
{
"epoch": 66.48,
"learning_rate": 0.00011183150183150183,
"loss": 0.6548,
"step": 6050
},
{
"epoch": 67.03,
"learning_rate": 0.00011,
"loss": 0.6542,
"step": 6100
},
{
"epoch": 67.58,
"learning_rate": 0.00010816849816849816,
"loss": 0.6351,
"step": 6150
},
{
"epoch": 68.13,
"learning_rate": 0.00010633699633699634,
"loss": 0.6513,
"step": 6200
},
{
"epoch": 68.68,
"learning_rate": 0.00010450549450549451,
"loss": 0.6328,
"step": 6250
},
{
"epoch": 69.23,
"learning_rate": 0.00010267399267399267,
"loss": 0.6507,
"step": 6300
},
{
"epoch": 69.78,
"learning_rate": 0.00010084249084249085,
"loss": 0.6389,
"step": 6350
},
{
"epoch": 70.33,
"learning_rate": 9.901098901098901e-05,
"loss": 0.6525,
"step": 6400
},
{
"epoch": 70.87,
"learning_rate": 9.717948717948718e-05,
"loss": 0.6436,
"step": 6450
},
{
"epoch": 71.43,
"learning_rate": 9.534798534798534e-05,
"loss": 0.6407,
"step": 6500
},
{
"epoch": 71.43,
"eval_loss": 0.16047754883766174,
"eval_runtime": 178.7321,
"eval_samples_per_second": 27.096,
"eval_steps_per_second": 0.85,
"eval_wer": 0.12398523985239852,
"step": 6500
},
{
"epoch": 71.97,
"learning_rate": 9.351648351648353e-05,
"loss": 0.6274,
"step": 6550
},
{
"epoch": 72.52,
"learning_rate": 9.168498168498169e-05,
"loss": 0.6338,
"step": 6600
},
{
"epoch": 73.08,
"learning_rate": 8.989010989010989e-05,
"loss": 0.6296,
"step": 6650
},
{
"epoch": 73.62,
"learning_rate": 8.805860805860807e-05,
"loss": 0.6202,
"step": 6700
},
{
"epoch": 74.17,
"learning_rate": 8.622710622710623e-05,
"loss": 0.6332,
"step": 6750
},
{
"epoch": 74.72,
"learning_rate": 8.43956043956044e-05,
"loss": 0.623,
"step": 6800
},
{
"epoch": 75.27,
"learning_rate": 8.256410256410257e-05,
"loss": 0.6406,
"step": 6850
},
{
"epoch": 75.82,
"learning_rate": 8.073260073260073e-05,
"loss": 0.6136,
"step": 6900
},
{
"epoch": 76.37,
"learning_rate": 7.890109890109891e-05,
"loss": 0.6313,
"step": 6950
},
{
"epoch": 76.92,
"learning_rate": 7.706959706959707e-05,
"loss": 0.6222,
"step": 7000
},
{
"epoch": 76.92,
"eval_loss": 0.15765224397182465,
"eval_runtime": 176.2542,
"eval_samples_per_second": 27.477,
"eval_steps_per_second": 0.862,
"eval_wer": 0.12185637240987794,
"step": 7000
},
{
"epoch": 77.47,
"learning_rate": 7.523809523809524e-05,
"loss": 0.6149,
"step": 7050
},
{
"epoch": 78.02,
"learning_rate": 7.34065934065934e-05,
"loss": 0.6338,
"step": 7100
},
{
"epoch": 78.57,
"learning_rate": 7.157509157509158e-05,
"loss": 0.6231,
"step": 7150
},
{
"epoch": 79.12,
"learning_rate": 6.974358974358974e-05,
"loss": 0.6245,
"step": 7200
},
{
"epoch": 79.67,
"learning_rate": 6.791208791208791e-05,
"loss": 0.6022,
"step": 7250
},
{
"epoch": 80.22,
"learning_rate": 6.608058608058607e-05,
"loss": 0.6138,
"step": 7300
},
{
"epoch": 80.77,
"learning_rate": 6.424908424908426e-05,
"loss": 0.6163,
"step": 7350
},
{
"epoch": 81.32,
"learning_rate": 6.241758241758242e-05,
"loss": 0.6275,
"step": 7400
},
{
"epoch": 81.86,
"learning_rate": 6.0586080586080586e-05,
"loss": 0.6085,
"step": 7450
},
{
"epoch": 82.42,
"learning_rate": 5.8754578754578754e-05,
"loss": 0.6039,
"step": 7500
},
{
"epoch": 82.42,
"eval_loss": 0.16446340084075928,
"eval_runtime": 178.7505,
"eval_samples_per_second": 27.094,
"eval_steps_per_second": 0.85,
"eval_wer": 0.12489355662787398,
"step": 7500
},
{
"epoch": 82.96,
"learning_rate": 5.692307692307693e-05,
"loss": 0.6063,
"step": 7550
},
{
"epoch": 83.51,
"learning_rate": 5.5091575091575095e-05,
"loss": 0.6039,
"step": 7600
},
{
"epoch": 84.07,
"learning_rate": 5.326007326007326e-05,
"loss": 0.6204,
"step": 7650
},
{
"epoch": 84.61,
"learning_rate": 5.142857142857143e-05,
"loss": 0.5922,
"step": 7700
},
{
"epoch": 85.16,
"learning_rate": 4.9597069597069596e-05,
"loss": 0.6031,
"step": 7750
},
{
"epoch": 85.71,
"learning_rate": 4.776556776556776e-05,
"loss": 0.5984,
"step": 7800
},
{
"epoch": 86.26,
"learning_rate": 4.593406593406593e-05,
"loss": 0.6213,
"step": 7850
},
{
"epoch": 86.81,
"learning_rate": 4.41025641025641e-05,
"loss": 0.5927,
"step": 7900
},
{
"epoch": 87.36,
"learning_rate": 4.227106227106227e-05,
"loss": 0.598,
"step": 7950
},
{
"epoch": 87.91,
"learning_rate": 4.0439560439560445e-05,
"loss": 0.5928,
"step": 8000
},
{
"epoch": 87.91,
"eval_loss": 0.15899540483951569,
"eval_runtime": 178.7907,
"eval_samples_per_second": 27.088,
"eval_steps_per_second": 0.85,
"eval_wer": 0.12140221402214021,
"step": 8000
},
{
"epoch": 88.46,
"learning_rate": 3.860805860805861e-05,
"loss": 0.6021,
"step": 8050
},
{
"epoch": 89.01,
"learning_rate": 3.677655677655678e-05,
"loss": 0.5962,
"step": 8100
},
{
"epoch": 89.56,
"learning_rate": 3.494505494505495e-05,
"loss": 0.5798,
"step": 8150
},
{
"epoch": 90.11,
"learning_rate": 3.3113553113553114e-05,
"loss": 0.6024,
"step": 8200
},
{
"epoch": 90.66,
"learning_rate": 3.128205128205128e-05,
"loss": 0.5831,
"step": 8250
},
{
"epoch": 91.21,
"learning_rate": 2.945054945054945e-05,
"loss": 0.6001,
"step": 8300
},
{
"epoch": 91.75,
"learning_rate": 2.761904761904762e-05,
"loss": 0.5901,
"step": 8350
},
{
"epoch": 92.31,
"learning_rate": 2.578754578754579e-05,
"loss": 0.6078,
"step": 8400
},
{
"epoch": 92.85,
"learning_rate": 2.3956043956043956e-05,
"loss": 0.5853,
"step": 8450
},
{
"epoch": 93.4,
"learning_rate": 2.2124542124542124e-05,
"loss": 0.6022,
"step": 8500
},
{
"epoch": 93.4,
"eval_loss": 0.1596660017967224,
"eval_runtime": 178.6421,
"eval_samples_per_second": 27.11,
"eval_steps_per_second": 0.851,
"eval_wer": 0.1213170593244394,
"step": 8500
},
{
"epoch": 93.95,
"learning_rate": 2.029304029304029e-05,
"loss": 0.5721,
"step": 8550
},
{
"epoch": 94.5,
"learning_rate": 1.8461538461538465e-05,
"loss": 0.5839,
"step": 8600
},
{
"epoch": 95.05,
"learning_rate": 1.6630036630036632e-05,
"loss": 0.587,
"step": 8650
},
{
"epoch": 95.6,
"learning_rate": 1.4835164835164835e-05,
"loss": 0.5817,
"step": 8700
},
{
"epoch": 96.15,
"learning_rate": 1.3003663003663005e-05,
"loss": 0.5819,
"step": 8750
},
{
"epoch": 96.7,
"learning_rate": 1.1172161172161172e-05,
"loss": 0.5853,
"step": 8800
},
{
"epoch": 97.25,
"learning_rate": 9.340659340659341e-06,
"loss": 0.5778,
"step": 8850
},
{
"epoch": 97.8,
"learning_rate": 7.509157509157509e-06,
"loss": 0.6038,
"step": 8900
},
{
"epoch": 98.35,
"learning_rate": 5.677655677655678e-06,
"loss": 0.5756,
"step": 8950
},
{
"epoch": 98.9,
"learning_rate": 3.846153846153847e-06,
"loss": 0.5814,
"step": 9000
},
{
"epoch": 98.9,
"eval_loss": 0.1598692536354065,
"eval_runtime": 178.176,
"eval_samples_per_second": 27.181,
"eval_steps_per_second": 0.853,
"eval_wer": 0.11989781436275901,
"step": 9000
},
{
"epoch": 99.45,
"learning_rate": 2.0146520146520148e-06,
"loss": 0.5807,
"step": 9050
},
{
"epoch": 99.99,
"learning_rate": 1.8315018315018315e-07,
"loss": 0.5798,
"step": 9100
},
{
"epoch": 99.99,
"step": 9100,
"total_flos": 1.4065789113067918e+20,
"train_loss": 1.0890738963032818,
"train_runtime": 51321.8258,
"train_samples_per_second": 22.778,
"train_steps_per_second": 0.177
}
],
"max_steps": 9100,
"num_train_epochs": 100,
"total_flos": 1.4065789113067918e+20,
"trial_name": null,
"trial_params": null
}