xls-r-300m-pt / trainer_state.json
AlexN's picture
End of training
6db3b99
{
"best_metric": 0.22902172803878784,
"best_model_checkpoint": "./checkpoint-11500",
"epoch": 15.0,
"global_step": 11670,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.13,
"learning_rate": 1.2933333333333334e-05,
"loss": 14.1317,
"step": 100
},
{
"epoch": 0.26,
"learning_rate": 2.6266666666666667e-05,
"loss": 5.2635,
"step": 200
},
{
"epoch": 0.39,
"learning_rate": 3.960000000000001e-05,
"loss": 3.8268,
"step": 300
},
{
"epoch": 0.51,
"learning_rate": 5.293333333333334e-05,
"loss": 3.2266,
"step": 400
},
{
"epoch": 0.64,
"learning_rate": 6.626666666666666e-05,
"loss": 3.0952,
"step": 500
},
{
"epoch": 0.64,
"eval_loss": 3.0981762409210205,
"eval_runtime": 326.629,
"eval_samples_per_second": 25.414,
"eval_steps_per_second": 0.796,
"eval_wer": 1.0,
"step": 500
},
{
"epoch": 0.77,
"learning_rate": 7.960000000000001e-05,
"loss": 3.0583,
"step": 600
},
{
"epoch": 0.9,
"learning_rate": 9.293333333333333e-05,
"loss": 3.0349,
"step": 700
},
{
"epoch": 1.03,
"learning_rate": 0.00010626666666666667,
"loss": 2.9355,
"step": 800
},
{
"epoch": 1.16,
"learning_rate": 0.00011960000000000001,
"loss": 2.3372,
"step": 900
},
{
"epoch": 1.29,
"learning_rate": 0.00013293333333333333,
"loss": 1.7975,
"step": 1000
},
{
"epoch": 1.29,
"eval_loss": 0.7887413501739502,
"eval_runtime": 341.1915,
"eval_samples_per_second": 24.329,
"eval_steps_per_second": 0.762,
"eval_wer": 0.5651080072872386,
"step": 1000
},
{
"epoch": 1.41,
"learning_rate": 0.00014626666666666665,
"loss": 1.6159,
"step": 1100
},
{
"epoch": 1.54,
"learning_rate": 0.0001596,
"loss": 1.5287,
"step": 1200
},
{
"epoch": 1.67,
"learning_rate": 0.00017293333333333335,
"loss": 1.4876,
"step": 1300
},
{
"epoch": 1.8,
"learning_rate": 0.00018626666666666668,
"loss": 1.4606,
"step": 1400
},
{
"epoch": 1.93,
"learning_rate": 0.0001996,
"loss": 1.4138,
"step": 1500
},
{
"epoch": 1.93,
"eval_loss": 0.523814857006073,
"eval_runtime": 341.7084,
"eval_samples_per_second": 24.293,
"eval_steps_per_second": 0.761,
"eval_wer": 0.43889997397414765,
"step": 1500
},
{
"epoch": 2.06,
"learning_rate": 0.00019809242871189774,
"loss": 1.4258,
"step": 1600
},
{
"epoch": 2.19,
"learning_rate": 0.000196125860373648,
"loss": 1.3778,
"step": 1700
},
{
"epoch": 2.31,
"learning_rate": 0.00019415929203539823,
"loss": 1.3518,
"step": 1800
},
{
"epoch": 2.44,
"learning_rate": 0.00019219272369714848,
"loss": 1.3692,
"step": 1900
},
{
"epoch": 2.57,
"learning_rate": 0.00019022615535889875,
"loss": 1.344,
"step": 2000
},
{
"epoch": 2.57,
"eval_loss": 0.4774917662143707,
"eval_runtime": 337.4262,
"eval_samples_per_second": 24.601,
"eval_steps_per_second": 0.771,
"eval_wer": 0.4318209421358549,
"step": 2000
},
{
"epoch": 2.7,
"learning_rate": 0.000188259587020649,
"loss": 1.3167,
"step": 2100
},
{
"epoch": 2.83,
"learning_rate": 0.00018629301868239921,
"loss": 1.3126,
"step": 2200
},
{
"epoch": 2.96,
"learning_rate": 0.00018432645034414946,
"loss": 1.3161,
"step": 2300
},
{
"epoch": 3.08,
"learning_rate": 0.0001823598820058997,
"loss": 1.2738,
"step": 2400
},
{
"epoch": 3.21,
"learning_rate": 0.00018039331366764995,
"loss": 1.2737,
"step": 2500
},
{
"epoch": 3.21,
"eval_loss": 0.46475061774253845,
"eval_runtime": 336.0925,
"eval_samples_per_second": 24.699,
"eval_steps_per_second": 0.774,
"eval_wer": 0.4074954454758393,
"step": 2500
},
{
"epoch": 3.34,
"learning_rate": 0.0001784267453294002,
"loss": 1.262,
"step": 2600
},
{
"epoch": 3.47,
"learning_rate": 0.00017646017699115044,
"loss": 1.2496,
"step": 2700
},
{
"epoch": 3.6,
"learning_rate": 0.0001744936086529007,
"loss": 1.2612,
"step": 2800
},
{
"epoch": 3.73,
"learning_rate": 0.00017252704031465093,
"loss": 1.2197,
"step": 2900
},
{
"epoch": 3.86,
"learning_rate": 0.0001705604719764012,
"loss": 1.2554,
"step": 3000
},
{
"epoch": 3.86,
"eval_loss": 0.4068518280982971,
"eval_runtime": 331.555,
"eval_samples_per_second": 25.037,
"eval_steps_per_second": 0.784,
"eval_wer": 0.36781469593129174,
"step": 3000
},
{
"epoch": 3.98,
"learning_rate": 0.00016859390363815145,
"loss": 1.2204,
"step": 3100
},
{
"epoch": 4.11,
"learning_rate": 0.0001666273352999017,
"loss": 1.2113,
"step": 3200
},
{
"epoch": 4.24,
"learning_rate": 0.00016466076696165194,
"loss": 1.2053,
"step": 3300
},
{
"epoch": 4.37,
"learning_rate": 0.00016269419862340216,
"loss": 1.2019,
"step": 3400
},
{
"epoch": 4.5,
"learning_rate": 0.0001607276302851524,
"loss": 1.1996,
"step": 3500
},
{
"epoch": 4.5,
"eval_loss": 0.3914338946342468,
"eval_runtime": 333.5366,
"eval_samples_per_second": 24.888,
"eval_steps_per_second": 0.78,
"eval_wer": 0.36679101240565626,
"step": 3500
},
{
"epoch": 4.63,
"learning_rate": 0.00015876106194690265,
"loss": 1.1903,
"step": 3600
},
{
"epoch": 4.76,
"learning_rate": 0.0001567944936086529,
"loss": 1.1866,
"step": 3700
},
{
"epoch": 4.88,
"learning_rate": 0.00015482792527040314,
"loss": 1.1678,
"step": 3800
},
{
"epoch": 5.01,
"learning_rate": 0.0001528613569321534,
"loss": 1.1854,
"step": 3900
},
{
"epoch": 5.14,
"learning_rate": 0.00015089478859390363,
"loss": 1.1427,
"step": 4000
},
{
"epoch": 5.14,
"eval_loss": 0.36938655376434326,
"eval_runtime": 326.2453,
"eval_samples_per_second": 25.444,
"eval_steps_per_second": 0.797,
"eval_wer": 0.3571614470373905,
"step": 4000
},
{
"epoch": 5.27,
"learning_rate": 0.0001489282202556539,
"loss": 1.1393,
"step": 4100
},
{
"epoch": 5.4,
"learning_rate": 0.00014696165191740415,
"loss": 1.1182,
"step": 4200
},
{
"epoch": 5.53,
"learning_rate": 0.0001449950835791544,
"loss": 1.127,
"step": 4300
},
{
"epoch": 5.66,
"learning_rate": 0.00014302851524090464,
"loss": 1.1431,
"step": 4400
},
{
"epoch": 5.78,
"learning_rate": 0.0001410619469026549,
"loss": 1.1372,
"step": 4500
},
{
"epoch": 5.78,
"eval_loss": 0.3567572236061096,
"eval_runtime": 325.458,
"eval_samples_per_second": 25.506,
"eval_steps_per_second": 0.799,
"eval_wer": 0.3500824151990978,
"step": 4500
},
{
"epoch": 5.91,
"learning_rate": 0.0001390953785644051,
"loss": 1.1226,
"step": 4600
},
{
"epoch": 6.04,
"learning_rate": 0.00013712881022615535,
"loss": 1.1019,
"step": 4700
},
{
"epoch": 6.17,
"learning_rate": 0.0001351622418879056,
"loss": 1.1031,
"step": 4800
},
{
"epoch": 6.3,
"learning_rate": 0.00013321533923303834,
"loss": 1.0882,
"step": 4900
},
{
"epoch": 6.43,
"learning_rate": 0.00013124877089478858,
"loss": 1.0831,
"step": 5000
},
{
"epoch": 6.43,
"eval_loss": 0.3331395387649536,
"eval_runtime": 327.2346,
"eval_samples_per_second": 25.367,
"eval_steps_per_second": 0.795,
"eval_wer": 0.3253058037650733,
"step": 5000
},
{
"epoch": 6.56,
"learning_rate": 0.00012928220255653886,
"loss": 1.1039,
"step": 5100
},
{
"epoch": 6.68,
"learning_rate": 0.0001273156342182891,
"loss": 1.0683,
"step": 5200
},
{
"epoch": 6.81,
"learning_rate": 0.00012534906588003935,
"loss": 1.0773,
"step": 5300
},
{
"epoch": 6.94,
"learning_rate": 0.0001233824975417896,
"loss": 1.1002,
"step": 5400
},
{
"epoch": 7.07,
"learning_rate": 0.00012141592920353984,
"loss": 1.1074,
"step": 5500
},
{
"epoch": 7.07,
"eval_loss": 0.333199679851532,
"eval_runtime": 329.8287,
"eval_samples_per_second": 25.168,
"eval_steps_per_second": 0.788,
"eval_wer": 0.3352129782250369,
"step": 5500
},
{
"epoch": 7.2,
"learning_rate": 0.00011944936086529008,
"loss": 1.0341,
"step": 5600
},
{
"epoch": 7.33,
"learning_rate": 0.00011748279252704033,
"loss": 1.0812,
"step": 5700
},
{
"epoch": 7.46,
"learning_rate": 0.00011551622418879056,
"loss": 1.0709,
"step": 5800
},
{
"epoch": 7.58,
"learning_rate": 0.00011354965585054081,
"loss": 1.0843,
"step": 5900
},
{
"epoch": 7.71,
"learning_rate": 0.00011158308751229105,
"loss": 1.0536,
"step": 6000
},
{
"epoch": 7.71,
"eval_loss": 0.3130946755409241,
"eval_runtime": 327.9895,
"eval_samples_per_second": 25.309,
"eval_steps_per_second": 0.793,
"eval_wer": 0.3151557213498742,
"step": 6000
},
{
"epoch": 7.84,
"learning_rate": 0.0001096165191740413,
"loss": 1.0239,
"step": 6100
},
{
"epoch": 7.97,
"learning_rate": 0.00010764995083579154,
"loss": 1.0383,
"step": 6200
},
{
"epoch": 8.1,
"learning_rate": 0.00010568338249754179,
"loss": 1.0157,
"step": 6300
},
{
"epoch": 8.23,
"learning_rate": 0.00010371681415929205,
"loss": 1.0128,
"step": 6400
},
{
"epoch": 8.35,
"learning_rate": 0.0001017502458210423,
"loss": 1.0248,
"step": 6500
},
{
"epoch": 8.35,
"eval_loss": 0.30239033699035645,
"eval_runtime": 328.0553,
"eval_samples_per_second": 25.304,
"eval_steps_per_second": 0.793,
"eval_wer": 0.3022642491541598,
"step": 6500
},
{
"epoch": 8.48,
"learning_rate": 9.978367748279254e-05,
"loss": 0.9989,
"step": 6600
},
{
"epoch": 8.61,
"learning_rate": 9.781710914454277e-05,
"loss": 1.0151,
"step": 6700
},
{
"epoch": 8.74,
"learning_rate": 9.585054080629302e-05,
"loss": 0.9914,
"step": 6800
},
{
"epoch": 8.87,
"learning_rate": 9.388397246804326e-05,
"loss": 0.9893,
"step": 6900
},
{
"epoch": 9.0,
"learning_rate": 9.193706981317602e-05,
"loss": 1.0075,
"step": 7000
},
{
"epoch": 9.0,
"eval_loss": 0.2947603166103363,
"eval_runtime": 326.8764,
"eval_samples_per_second": 25.395,
"eval_steps_per_second": 0.795,
"eval_wer": 0.3028368179057864,
"step": 7000
},
{
"epoch": 9.13,
"learning_rate": 8.997050147492626e-05,
"loss": 0.9851,
"step": 7100
},
{
"epoch": 9.25,
"learning_rate": 8.800393313667651e-05,
"loss": 0.973,
"step": 7200
},
{
"epoch": 9.38,
"learning_rate": 8.605703048180925e-05,
"loss": 0.9623,
"step": 7300
},
{
"epoch": 9.51,
"learning_rate": 8.40904621435595e-05,
"loss": 0.9598,
"step": 7400
},
{
"epoch": 9.64,
"learning_rate": 8.212389380530974e-05,
"loss": 0.979,
"step": 7500
},
{
"epoch": 9.64,
"eval_loss": 0.27962473034858704,
"eval_runtime": 329.4084,
"eval_samples_per_second": 25.2,
"eval_steps_per_second": 0.789,
"eval_wer": 0.2852953934241346,
"step": 7500
},
{
"epoch": 9.77,
"learning_rate": 8.015732546705999e-05,
"loss": 0.9582,
"step": 7600
},
{
"epoch": 9.9,
"learning_rate": 7.819075712881023e-05,
"loss": 0.9467,
"step": 7700
},
{
"epoch": 10.03,
"learning_rate": 7.622418879056048e-05,
"loss": 0.907,
"step": 7800
},
{
"epoch": 10.15,
"learning_rate": 7.425762045231072e-05,
"loss": 0.944,
"step": 7900
},
{
"epoch": 10.28,
"learning_rate": 7.229105211406097e-05,
"loss": 0.9594,
"step": 8000
},
{
"epoch": 10.28,
"eval_loss": 0.2719425559043884,
"eval_runtime": 329.4201,
"eval_samples_per_second": 25.199,
"eval_steps_per_second": 0.789,
"eval_wer": 0.2789103843150863,
"step": 8000
},
{
"epoch": 10.41,
"learning_rate": 7.032448377581121e-05,
"loss": 0.9559,
"step": 8100
},
{
"epoch": 10.54,
"learning_rate": 6.835791543756146e-05,
"loss": 0.9368,
"step": 8200
},
{
"epoch": 10.67,
"learning_rate": 6.63913470993117e-05,
"loss": 0.9362,
"step": 8300
},
{
"epoch": 10.8,
"learning_rate": 6.442477876106195e-05,
"loss": 0.9134,
"step": 8400
},
{
"epoch": 10.93,
"learning_rate": 6.24582104228122e-05,
"loss": 0.9172,
"step": 8500
},
{
"epoch": 10.93,
"eval_loss": 0.2620205879211426,
"eval_runtime": 325.6384,
"eval_samples_per_second": 25.491,
"eval_steps_per_second": 0.798,
"eval_wer": 0.26950637633382496,
"step": 8500
},
{
"epoch": 11.05,
"learning_rate": 6.049164208456244e-05,
"loss": 0.9179,
"step": 8600
},
{
"epoch": 11.18,
"learning_rate": 5.8525073746312686e-05,
"loss": 0.9157,
"step": 8700
},
{
"epoch": 11.31,
"learning_rate": 5.655850540806293e-05,
"loss": 0.8974,
"step": 8800
},
{
"epoch": 11.44,
"learning_rate": 5.459193706981318e-05,
"loss": 0.8994,
"step": 8900
},
{
"epoch": 11.57,
"learning_rate": 5.262536873156343e-05,
"loss": 0.9047,
"step": 9000
},
{
"epoch": 11.57,
"eval_loss": 0.25371646881103516,
"eval_runtime": 328.7713,
"eval_samples_per_second": 25.249,
"eval_steps_per_second": 0.791,
"eval_wer": 0.2596165524420925,
"step": 9000
},
{
"epoch": 11.7,
"learning_rate": 5.065880039331367e-05,
"loss": 0.8816,
"step": 9100
},
{
"epoch": 11.83,
"learning_rate": 4.869223205506391e-05,
"loss": 0.9007,
"step": 9200
},
{
"epoch": 11.95,
"learning_rate": 4.672566371681416e-05,
"loss": 0.9035,
"step": 9300
},
{
"epoch": 12.08,
"learning_rate": 4.475909537856441e-05,
"loss": 0.8705,
"step": 9400
},
{
"epoch": 12.21,
"learning_rate": 4.279252704031465e-05,
"loss": 0.8777,
"step": 9500
},
{
"epoch": 12.21,
"eval_loss": 0.24379895627498627,
"eval_runtime": 329.8391,
"eval_samples_per_second": 25.167,
"eval_steps_per_second": 0.788,
"eval_wer": 0.25250281946733755,
"step": 9500
},
{
"epoch": 12.34,
"learning_rate": 4.0825958702064895e-05,
"loss": 0.8734,
"step": 9600
},
{
"epoch": 12.47,
"learning_rate": 3.887905604719764e-05,
"loss": 0.8886,
"step": 9700
},
{
"epoch": 12.6,
"learning_rate": 3.691248770894789e-05,
"loss": 0.8626,
"step": 9800
},
{
"epoch": 12.72,
"learning_rate": 3.4945919370698134e-05,
"loss": 0.8669,
"step": 9900
},
{
"epoch": 12.85,
"learning_rate": 3.297935103244838e-05,
"loss": 0.8629,
"step": 10000
},
{
"epoch": 12.85,
"eval_loss": 0.2408979833126068,
"eval_runtime": 327.8875,
"eval_samples_per_second": 25.317,
"eval_steps_per_second": 0.793,
"eval_wer": 0.24934501604927561,
"step": 10000
},
{
"epoch": 12.98,
"learning_rate": 3.1012782694198625e-05,
"loss": 0.8489,
"step": 10100
},
{
"epoch": 13.11,
"learning_rate": 2.904621435594887e-05,
"loss": 0.8356,
"step": 10200
},
{
"epoch": 13.24,
"learning_rate": 2.7079646017699116e-05,
"loss": 0.8596,
"step": 10300
},
{
"epoch": 13.37,
"learning_rate": 2.5113077679449358e-05,
"loss": 0.8401,
"step": 10400
},
{
"epoch": 13.5,
"learning_rate": 2.3146509341199607e-05,
"loss": 0.8575,
"step": 10500
},
{
"epoch": 13.5,
"eval_loss": 0.2366442084312439,
"eval_runtime": 327.7324,
"eval_samples_per_second": 25.329,
"eval_steps_per_second": 0.793,
"eval_wer": 0.24396633989763164,
"step": 10500
},
{
"epoch": 13.62,
"learning_rate": 2.1179941002949856e-05,
"loss": 0.8343,
"step": 10600
},
{
"epoch": 13.75,
"learning_rate": 1.9213372664700098e-05,
"loss": 0.8308,
"step": 10700
},
{
"epoch": 13.88,
"learning_rate": 1.7246804326450343e-05,
"loss": 0.8431,
"step": 10800
},
{
"epoch": 14.01,
"learning_rate": 1.5280235988200592e-05,
"loss": 0.8468,
"step": 10900
},
{
"epoch": 14.14,
"learning_rate": 1.3313667649950836e-05,
"loss": 0.8361,
"step": 11000
},
{
"epoch": 14.14,
"eval_loss": 0.23166431486606598,
"eval_runtime": 329.2059,
"eval_samples_per_second": 25.215,
"eval_steps_per_second": 0.79,
"eval_wer": 0.23848356033660104,
"step": 11000
},
{
"epoch": 14.27,
"learning_rate": 1.1347099311701081e-05,
"loss": 0.824,
"step": 11100
},
{
"epoch": 14.4,
"learning_rate": 9.380530973451327e-06,
"loss": 0.8252,
"step": 11200
},
{
"epoch": 14.52,
"learning_rate": 7.4336283185840714e-06,
"loss": 0.8286,
"step": 11300
},
{
"epoch": 14.65,
"learning_rate": 5.467059980334317e-06,
"loss": 0.8077,
"step": 11400
},
{
"epoch": 14.78,
"learning_rate": 3.500491642084563e-06,
"loss": 0.8126,
"step": 11500
},
{
"epoch": 14.78,
"eval_loss": 0.22902172803878784,
"eval_runtime": 327.6124,
"eval_samples_per_second": 25.338,
"eval_steps_per_second": 0.794,
"eval_wer": 0.23818860067667216,
"step": 11500
},
{
"epoch": 14.91,
"learning_rate": 1.5339233038348083e-06,
"loss": 0.8118,
"step": 11600
},
{
"epoch": 15.0,
"step": 11670,
"total_flos": 4.014305196020058e+19,
"train_loss": 1.3307904394651542,
"train_runtime": 23640.5221,
"train_samples_per_second": 15.785,
"train_steps_per_second": 0.494
}
],
"max_steps": 11670,
"num_train_epochs": 15,
"total_flos": 4.014305196020058e+19,
"trial_name": null,
"trial_params": null
}