wav2vec2-large-xlsr-cantonese / trainer_state.json
scottykwok's picture
First version of wav2vec2-large-xlsr finetuned using common-voice cantonese
123cdad
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 76.49968454258675,
"global_step": 40392,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.5,
"learning_rate": 0.0002971500949968334,
"loss": 28.5848,
"step": 400
},
{
"epoch": 0.5,
"eval_cer": 1.0,
"eval_loss": 6.502501964569092,
"eval_runtime": 11.659,
"eval_samples_per_second": 44.344,
"eval_steps_per_second": 5.575,
"step": 400
},
{
"epoch": 1.01,
"learning_rate": 0.000293350221659278,
"loss": 6.3672,
"step": 800
},
{
"epoch": 1.01,
"eval_cer": 1.0,
"eval_loss": 6.260684490203857,
"eval_runtime": 10.5591,
"eval_samples_per_second": 48.963,
"eval_steps_per_second": 6.156,
"step": 800
},
{
"epoch": 1.51,
"learning_rate": 0.0002895503483217226,
"loss": 6.2055,
"step": 1200
},
{
"epoch": 1.51,
"eval_cer": 1.0,
"eval_loss": 6.158237934112549,
"eval_runtime": 10.6391,
"eval_samples_per_second": 48.594,
"eval_steps_per_second": 6.11,
"step": 1200
},
{
"epoch": 2.02,
"learning_rate": 0.00028575047498416716,
"loss": 5.7804,
"step": 1600
},
{
"epoch": 2.02,
"eval_cer": 0.9059449866903283,
"eval_loss": 4.536555767059326,
"eval_runtime": 10.5927,
"eval_samples_per_second": 48.807,
"eval_steps_per_second": 6.136,
"step": 1600
},
{
"epoch": 2.52,
"learning_rate": 0.00028195060164661176,
"loss": 3.9161,
"step": 2000
},
{
"epoch": 2.52,
"eval_cer": 0.8118899733806566,
"eval_loss": 3.134929895401001,
"eval_runtime": 10.6118,
"eval_samples_per_second": 48.72,
"eval_steps_per_second": 6.125,
"step": 2000
},
{
"epoch": 3.03,
"learning_rate": 0.00027815072830905636,
"loss": 3.0555,
"step": 2400
},
{
"epoch": 3.03,
"eval_cer": 0.7320319432120674,
"eval_loss": 2.483675241470337,
"eval_runtime": 10.6525,
"eval_samples_per_second": 48.533,
"eval_steps_per_second": 6.102,
"step": 2400
},
{
"epoch": 3.54,
"learning_rate": 0.00027435085497150096,
"loss": 2.4529,
"step": 2800
},
{
"epoch": 3.54,
"eval_cer": 0.6789707187222715,
"eval_loss": 2.2075281143188477,
"eval_runtime": 10.7002,
"eval_samples_per_second": 48.317,
"eval_steps_per_second": 6.075,
"step": 2800
},
{
"epoch": 4.04,
"learning_rate": 0.0002705509816339455,
"loss": 2.1371,
"step": 3200
},
{
"epoch": 4.04,
"eval_cer": 0.5902395740905058,
"eval_loss": 1.737073302268982,
"eval_runtime": 10.7189,
"eval_samples_per_second": 48.232,
"eval_steps_per_second": 6.064,
"step": 3200
},
{
"epoch": 4.55,
"learning_rate": 0.0002667511082963901,
"loss": 1.83,
"step": 3600
},
{
"epoch": 4.55,
"eval_cer": 0.5771073646850045,
"eval_loss": 1.6357048749923706,
"eval_runtime": 10.6531,
"eval_samples_per_second": 48.53,
"eval_steps_per_second": 6.102,
"step": 3600
},
{
"epoch": 5.05,
"learning_rate": 0.0002629512349588347,
"loss": 1.7147,
"step": 4000
},
{
"epoch": 5.05,
"eval_cer": 0.540550133096717,
"eval_loss": 1.4679120779037476,
"eval_runtime": 10.6485,
"eval_samples_per_second": 48.551,
"eval_steps_per_second": 6.104,
"step": 4000
},
{
"epoch": 5.56,
"learning_rate": 0.00025915136162127925,
"loss": 1.5428,
"step": 4400
},
{
"epoch": 5.56,
"eval_cer": 0.5210292812777285,
"eval_loss": 1.4024958610534668,
"eval_runtime": 11.1389,
"eval_samples_per_second": 46.414,
"eval_steps_per_second": 5.835,
"step": 4400
},
{
"epoch": 6.06,
"learning_rate": 0.00025535148828372385,
"loss": 1.4859,
"step": 4800
},
{
"epoch": 6.06,
"eval_cer": 0.5094942324755989,
"eval_loss": 1.3681739568710327,
"eval_runtime": 10.7225,
"eval_samples_per_second": 48.217,
"eval_steps_per_second": 6.062,
"step": 4800
},
{
"epoch": 6.57,
"learning_rate": 0.00025155161494616845,
"loss": 1.359,
"step": 5200
},
{
"epoch": 6.57,
"eval_cer": 0.49955634427684115,
"eval_loss": 1.3149375915527344,
"eval_runtime": 10.7188,
"eval_samples_per_second": 48.233,
"eval_steps_per_second": 6.064,
"step": 5200
},
{
"epoch": 7.07,
"learning_rate": 0.00024775174160861305,
"loss": 1.3425,
"step": 5600
},
{
"epoch": 7.07,
"eval_cer": 0.4913930789707187,
"eval_loss": 1.3069249391555786,
"eval_runtime": 10.672,
"eval_samples_per_second": 48.444,
"eval_steps_per_second": 6.091,
"step": 5600
},
{
"epoch": 7.58,
"learning_rate": 0.00024395186827105763,
"loss": 1.2121,
"step": 6000
},
{
"epoch": 7.58,
"eval_cer": 0.49032830523513754,
"eval_loss": 1.284098744392395,
"eval_runtime": 10.6314,
"eval_samples_per_second": 48.629,
"eval_steps_per_second": 6.114,
"step": 6000
},
{
"epoch": 8.08,
"learning_rate": 0.0002401519949335022,
"loss": 1.1872,
"step": 6400
},
{
"epoch": 8.08,
"eval_cer": 0.4727595385980479,
"eval_loss": 1.2425189018249512,
"eval_runtime": 10.4012,
"eval_samples_per_second": 49.706,
"eval_steps_per_second": 6.249,
"step": 6400
},
{
"epoch": 8.59,
"learning_rate": 0.00023635212159594677,
"loss": 1.0969,
"step": 6800
},
{
"epoch": 8.59,
"eval_cer": 0.47346938775510206,
"eval_loss": 1.2218185663223267,
"eval_runtime": 10.6834,
"eval_samples_per_second": 48.393,
"eval_steps_per_second": 6.084,
"step": 6800
},
{
"epoch": 9.09,
"learning_rate": 0.00023255224825839138,
"loss": 1.0807,
"step": 7200
},
{
"epoch": 9.09,
"eval_cer": 0.4603371783496007,
"eval_loss": 1.2110862731933594,
"eval_runtime": 10.7448,
"eval_samples_per_second": 48.116,
"eval_steps_per_second": 6.049,
"step": 7200
},
{
"epoch": 9.6,
"learning_rate": 0.00022875237492083595,
"loss": 0.9964,
"step": 7600
},
{
"epoch": 9.6,
"eval_cer": 0.44986690328305234,
"eval_loss": 1.1391839981079102,
"eval_runtime": 10.7437,
"eval_samples_per_second": 48.121,
"eval_steps_per_second": 6.05,
"step": 7600
},
{
"epoch": 10.1,
"learning_rate": 0.00022495250158328055,
"loss": 0.9758,
"step": 8000
},
{
"epoch": 10.1,
"eval_cer": 0.4433007985803017,
"eval_loss": 1.115509033203125,
"eval_runtime": 10.6429,
"eval_samples_per_second": 48.577,
"eval_steps_per_second": 6.107,
"step": 8000
},
{
"epoch": 10.61,
"learning_rate": 0.00022115262824572512,
"loss": 0.8896,
"step": 8400
},
{
"epoch": 10.61,
"eval_cer": 0.4456078083407276,
"eval_loss": 1.134329080581665,
"eval_runtime": 10.6834,
"eval_samples_per_second": 48.393,
"eval_steps_per_second": 6.084,
"step": 8400
},
{
"epoch": 11.11,
"learning_rate": 0.00021735275490816972,
"loss": 0.869,
"step": 8800
},
{
"epoch": 11.11,
"eval_cer": 0.4413487133984028,
"eval_loss": 1.1351521015167236,
"eval_runtime": 10.6859,
"eval_samples_per_second": 48.381,
"eval_steps_per_second": 6.083,
"step": 8800
},
{
"epoch": 11.62,
"learning_rate": 0.0002135528815706143,
"loss": 0.8204,
"step": 9200
},
{
"epoch": 11.62,
"eval_cer": 0.4431233362910382,
"eval_loss": 1.1095759868621826,
"eval_runtime": 10.6541,
"eval_samples_per_second": 48.526,
"eval_steps_per_second": 6.101,
"step": 9200
},
{
"epoch": 12.12,
"learning_rate": 0.00020975300823305887,
"loss": 0.7935,
"step": 9600
},
{
"epoch": 12.12,
"eval_cer": 0.4427684117125111,
"eval_loss": 1.1288646459579468,
"eval_runtime": 10.7946,
"eval_samples_per_second": 47.894,
"eval_steps_per_second": 6.022,
"step": 9600
},
{
"epoch": 12.63,
"learning_rate": 0.00020595313489550347,
"loss": 0.728,
"step": 10000
},
{
"epoch": 12.63,
"eval_cer": 0.4321206743566992,
"eval_loss": 1.086965799331665,
"eval_runtime": 10.816,
"eval_samples_per_second": 47.8,
"eval_steps_per_second": 6.01,
"step": 10000
},
{
"epoch": 13.13,
"learning_rate": 0.00020215326155794804,
"loss": 0.7185,
"step": 10400
},
{
"epoch": 13.13,
"eval_cer": 0.42058562555456963,
"eval_loss": 1.0575684309005737,
"eval_runtime": 10.6718,
"eval_samples_per_second": 48.446,
"eval_steps_per_second": 6.091,
"step": 10400
},
{
"epoch": 13.64,
"learning_rate": 0.00019835338822039264,
"loss": 0.6604,
"step": 10800
},
{
"epoch": 13.64,
"eval_cer": 0.4262644188110027,
"eval_loss": 1.0773364305496216,
"eval_runtime": 10.6968,
"eval_samples_per_second": 48.332,
"eval_steps_per_second": 6.077,
"step": 10800
},
{
"epoch": 14.14,
"learning_rate": 0.00019455351488283722,
"loss": 0.6319,
"step": 11200
},
{
"epoch": 14.14,
"eval_cer": 0.41543921916592724,
"eval_loss": 1.0636992454528809,
"eval_runtime": 10.6335,
"eval_samples_per_second": 48.62,
"eval_steps_per_second": 6.113,
"step": 11200
},
{
"epoch": 14.65,
"learning_rate": 0.00019075364154528182,
"loss": 0.5949,
"step": 11600
},
{
"epoch": 14.65,
"eval_cer": 0.41774622892635316,
"eval_loss": 1.0470980405807495,
"eval_runtime": 10.6966,
"eval_samples_per_second": 48.333,
"eval_steps_per_second": 6.077,
"step": 11600
},
{
"epoch": 15.15,
"learning_rate": 0.0001869537682077264,
"loss": 0.5729,
"step": 12000
},
{
"epoch": 15.15,
"eval_cer": 0.4111801242236025,
"eval_loss": 1.069692850112915,
"eval_runtime": 10.6729,
"eval_samples_per_second": 48.44,
"eval_steps_per_second": 6.09,
"step": 12000
},
{
"epoch": 15.66,
"learning_rate": 0.000183153894870171,
"loss": 0.5408,
"step": 12400
},
{
"epoch": 15.66,
"eval_cer": 0.4157941437444543,
"eval_loss": 1.042482614517212,
"eval_runtime": 10.665,
"eval_samples_per_second": 48.476,
"eval_steps_per_second": 6.095,
"step": 12400
},
{
"epoch": 16.16,
"learning_rate": 0.00017935402153261557,
"loss": 0.5246,
"step": 12800
},
{
"epoch": 16.16,
"eval_cer": 0.4085181898846495,
"eval_loss": 1.0480538606643677,
"eval_runtime": 10.7079,
"eval_samples_per_second": 48.282,
"eval_steps_per_second": 6.07,
"step": 12800
},
{
"epoch": 16.67,
"learning_rate": 0.00017555414819506014,
"loss": 0.4757,
"step": 13200
},
{
"epoch": 16.67,
"eval_cer": 0.4065661047027507,
"eval_loss": 1.0319401025772095,
"eval_runtime": 10.661,
"eval_samples_per_second": 48.494,
"eval_steps_per_second": 6.097,
"step": 13200
},
{
"epoch": 17.17,
"learning_rate": 0.00017175427485750474,
"loss": 0.4694,
"step": 13600
},
{
"epoch": 17.17,
"eval_cer": 0.402661934338953,
"eval_loss": 1.0221748352050781,
"eval_runtime": 10.6738,
"eval_samples_per_second": 48.436,
"eval_steps_per_second": 6.09,
"step": 13600
},
{
"epoch": 17.68,
"learning_rate": 0.0001679544015199493,
"loss": 0.4514,
"step": 14000
},
{
"epoch": 17.68,
"eval_cer": 0.4010647737355812,
"eval_loss": 1.0336159467697144,
"eval_runtime": 10.6673,
"eval_samples_per_second": 48.466,
"eval_steps_per_second": 6.093,
"step": 14000
},
{
"epoch": 18.18,
"learning_rate": 0.00016415452818239391,
"loss": 0.4479,
"step": 14400
},
{
"epoch": 18.18,
"eval_cer": 0.40301685891748,
"eval_loss": 1.0329766273498535,
"eval_runtime": 11.2617,
"eval_samples_per_second": 45.908,
"eval_steps_per_second": 5.772,
"step": 14400
},
{
"epoch": 18.69,
"learning_rate": 0.0001603546548448385,
"loss": 0.4206,
"step": 14800
},
{
"epoch": 18.69,
"eval_cer": 0.3953859804791482,
"eval_loss": 1.0453214645385742,
"eval_runtime": 10.4709,
"eval_samples_per_second": 49.375,
"eval_steps_per_second": 6.208,
"step": 14800
},
{
"epoch": 19.19,
"learning_rate": 0.0001565547815072831,
"loss": 0.4025,
"step": 15200
},
{
"epoch": 19.19,
"eval_cer": 0.4,
"eval_loss": 1.0425928831100464,
"eval_runtime": 10.7087,
"eval_samples_per_second": 48.278,
"eval_steps_per_second": 6.07,
"step": 15200
},
{
"epoch": 19.7,
"learning_rate": 0.00015275490816972766,
"loss": 0.368,
"step": 15600
},
{
"epoch": 19.7,
"eval_cer": 0.391659272404614,
"eval_loss": 1.0207164287567139,
"eval_runtime": 10.853,
"eval_samples_per_second": 47.637,
"eval_steps_per_second": 5.989,
"step": 15600
},
{
"epoch": 20.2,
"learning_rate": 0.00014895503483217226,
"loss": 0.3652,
"step": 16000
},
{
"epoch": 20.2,
"eval_cer": 0.3877551020408163,
"eval_loss": 1.019087791442871,
"eval_runtime": 10.7301,
"eval_samples_per_second": 48.182,
"eval_steps_per_second": 6.058,
"step": 16000
},
{
"epoch": 20.71,
"learning_rate": 0.00014515516149461683,
"loss": 0.3362,
"step": 16400
},
{
"epoch": 20.71,
"eval_cer": 0.38846495119787045,
"eval_loss": 1.0187304019927979,
"eval_runtime": 10.6995,
"eval_samples_per_second": 48.32,
"eval_steps_per_second": 6.075,
"step": 16400
},
{
"epoch": 21.21,
"learning_rate": 0.0001413552881570614,
"loss": 0.354,
"step": 16800
},
{
"epoch": 21.21,
"eval_cer": 0.3881100266193434,
"eval_loss": 1.0370773077011108,
"eval_runtime": 10.6833,
"eval_samples_per_second": 48.393,
"eval_steps_per_second": 6.084,
"step": 16800
},
{
"epoch": 21.72,
"learning_rate": 0.000137555414819506,
"loss": 0.3296,
"step": 17200
},
{
"epoch": 21.72,
"eval_cer": 0.3893522626441881,
"eval_loss": 1.0535281896591187,
"eval_runtime": 10.7771,
"eval_samples_per_second": 47.972,
"eval_steps_per_second": 6.031,
"step": 17200
},
{
"epoch": 22.22,
"learning_rate": 0.00013375554148195058,
"loss": 0.3134,
"step": 17600
},
{
"epoch": 22.22,
"eval_cer": 0.3877551020408163,
"eval_loss": 1.0371551513671875,
"eval_runtime": 10.68,
"eval_samples_per_second": 48.408,
"eval_steps_per_second": 6.086,
"step": 17600
},
{
"epoch": 22.73,
"learning_rate": 0.00012995566814439518,
"loss": 0.3077,
"step": 18000
},
{
"epoch": 22.73,
"eval_cer": 0.39077196095829636,
"eval_loss": 1.0353987216949463,
"eval_runtime": 10.4328,
"eval_samples_per_second": 49.555,
"eval_steps_per_second": 6.23,
"step": 18000
},
{
"epoch": 23.23,
"learning_rate": 0.00012615579480683976,
"loss": 0.289,
"step": 18400
},
{
"epoch": 23.23,
"eval_cer": 0.3934338952972493,
"eval_loss": 1.0498236417770386,
"eval_runtime": 10.6646,
"eval_samples_per_second": 48.478,
"eval_steps_per_second": 6.095,
"step": 18400
},
{
"epoch": 23.74,
"learning_rate": 0.00012235592146928436,
"loss": 0.2753,
"step": 18800
},
{
"epoch": 23.74,
"eval_cer": 0.39023957409050575,
"eval_loss": 1.0461602210998535,
"eval_runtime": 10.685,
"eval_samples_per_second": 48.386,
"eval_steps_per_second": 6.083,
"step": 18800
},
{
"epoch": 24.24,
"learning_rate": 0.00011855604813172893,
"loss": 0.2791,
"step": 19200
},
{
"epoch": 24.24,
"eval_cer": 0.38846495119787045,
"eval_loss": 1.07412588596344,
"eval_runtime": 10.7167,
"eval_samples_per_second": 48.243,
"eval_steps_per_second": 6.065,
"step": 19200
},
{
"epoch": 24.75,
"learning_rate": 0.00011475617479417352,
"loss": 0.2757,
"step": 19600
},
{
"epoch": 24.75,
"eval_cer": 0.385980479148181,
"eval_loss": 1.0546280145645142,
"eval_runtime": 10.5367,
"eval_samples_per_second": 49.067,
"eval_steps_per_second": 6.169,
"step": 19600
},
{
"epoch": 25.25,
"learning_rate": 0.0001109563014566181,
"loss": 0.2533,
"step": 20000
},
{
"epoch": 25.25,
"eval_cer": 0.3817213842058563,
"eval_loss": 1.0429767370224,
"eval_runtime": 10.7598,
"eval_samples_per_second": 48.049,
"eval_steps_per_second": 6.041,
"step": 20000
},
{
"epoch": 25.76,
"learning_rate": 0.00010715642811906269,
"loss": 0.2499,
"step": 20400
},
{
"epoch": 25.76,
"eval_cer": 0.38456078083407275,
"eval_loss": 1.0354866981506348,
"eval_runtime": 10.6619,
"eval_samples_per_second": 48.49,
"eval_steps_per_second": 6.096,
"step": 20400
},
{
"epoch": 26.26,
"learning_rate": 0.00010335655478150728,
"loss": 0.2407,
"step": 20800
},
{
"epoch": 26.26,
"eval_cer": 0.38101153504880214,
"eval_loss": 1.0512378215789795,
"eval_runtime": 10.667,
"eval_samples_per_second": 48.467,
"eval_steps_per_second": 6.094,
"step": 20800
},
{
"epoch": 26.77,
"learning_rate": 9.955668144395185e-05,
"loss": 0.2373,
"step": 21200
},
{
"epoch": 26.77,
"eval_cer": 0.3758651286601597,
"eval_loss": 1.032917857170105,
"eval_runtime": 10.6927,
"eval_samples_per_second": 48.351,
"eval_steps_per_second": 6.079,
"step": 21200
},
{
"epoch": 27.27,
"learning_rate": 9.575680810639644e-05,
"loss": 0.2295,
"step": 21600
},
{
"epoch": 27.27,
"eval_cer": 0.3785270629991127,
"eval_loss": 1.031385064125061,
"eval_runtime": 10.7343,
"eval_samples_per_second": 48.163,
"eval_steps_per_second": 6.055,
"step": 21600
},
{
"epoch": 27.78,
"learning_rate": 9.195693476884103e-05,
"loss": 0.2186,
"step": 22000
},
{
"epoch": 27.78,
"eval_cer": 0.3742679680567879,
"eval_loss": 1.028822422027588,
"eval_runtime": 10.6374,
"eval_samples_per_second": 48.602,
"eval_steps_per_second": 6.111,
"step": 22000
},
{
"epoch": 28.28,
"learning_rate": 8.815706143128561e-05,
"loss": 0.2084,
"step": 22400
},
{
"epoch": 28.28,
"eval_cer": 0.37373558118899736,
"eval_loss": 1.0298017263412476,
"eval_runtime": 10.6689,
"eval_samples_per_second": 48.459,
"eval_steps_per_second": 6.092,
"step": 22400
},
{
"epoch": 28.79,
"learning_rate": 8.43571880937302e-05,
"loss": 0.2066,
"step": 22800
},
{
"epoch": 28.79,
"eval_cer": 0.37497781721384205,
"eval_loss": 1.0195808410644531,
"eval_runtime": 10.4803,
"eval_samples_per_second": 49.331,
"eval_steps_per_second": 6.202,
"step": 22800
},
{
"epoch": 29.29,
"learning_rate": 8.055731475617479e-05,
"loss": 0.1933,
"step": 23200
},
{
"epoch": 29.29,
"eval_cer": 0.380301685891748,
"eval_loss": 1.0443964004516602,
"eval_runtime": 10.6391,
"eval_samples_per_second": 48.594,
"eval_steps_per_second": 6.11,
"step": 23200
},
{
"epoch": 29.8,
"learning_rate": 7.675744141861937e-05,
"loss": 0.1875,
"step": 23600
},
{
"epoch": 29.8,
"eval_cer": 0.3691215616681455,
"eval_loss": 1.0274165868759155,
"eval_runtime": 10.6901,
"eval_samples_per_second": 48.363,
"eval_steps_per_second": 6.08,
"step": 23600
},
{
"epoch": 30.3,
"learning_rate": 7.295756808106396e-05,
"loss": 0.184,
"step": 24000
},
{
"epoch": 30.3,
"eval_cer": 0.37267080745341613,
"eval_loss": 1.0159742832183838,
"eval_runtime": 10.6909,
"eval_samples_per_second": 48.359,
"eval_steps_per_second": 6.08,
"step": 24000
},
{
"epoch": 30.81,
"learning_rate": 6.915769474350855e-05,
"loss": 0.1864,
"step": 24400
},
{
"epoch": 30.81,
"eval_cer": 0.37089618456078083,
"eval_loss": 1.0185551643371582,
"eval_runtime": 10.6616,
"eval_samples_per_second": 48.492,
"eval_steps_per_second": 6.097,
"step": 24400
},
{
"epoch": 31.31,
"learning_rate": 6.535782140595312e-05,
"loss": 0.176,
"step": 24800
},
{
"epoch": 31.31,
"eval_cer": 0.3682342502218279,
"eval_loss": 1.01682710647583,
"eval_runtime": 10.7487,
"eval_samples_per_second": 48.099,
"eval_steps_per_second": 6.047,
"step": 24800
},
{
"epoch": 31.82,
"learning_rate": 6.155794806839771e-05,
"loss": 0.1734,
"step": 25200
},
{
"epoch": 31.82,
"eval_cer": 0.3685891748003549,
"eval_loss": 1.0079487562179565,
"eval_runtime": 10.6916,
"eval_samples_per_second": 48.356,
"eval_steps_per_second": 6.08,
"step": 25200
},
{
"epoch": 32.32,
"learning_rate": 5.7758074730842294e-05,
"loss": 0.1686,
"step": 25600
},
{
"epoch": 32.32,
"eval_cer": 0.37107364685004435,
"eval_loss": 1.0045541524887085,
"eval_runtime": 10.6896,
"eval_samples_per_second": 48.365,
"eval_steps_per_second": 6.081,
"step": 25600
},
{
"epoch": 32.83,
"learning_rate": 5.395820139328688e-05,
"loss": 0.1636,
"step": 26000
},
{
"epoch": 32.83,
"eval_cer": 0.366282165039929,
"eval_loss": 1.0012236833572388,
"eval_runtime": 10.6269,
"eval_samples_per_second": 48.65,
"eval_steps_per_second": 6.117,
"step": 26000
},
{
"epoch": 33.33,
"learning_rate": 5.015832805573147e-05,
"loss": 0.1584,
"step": 26400
},
{
"epoch": 33.33,
"eval_cer": 0.3634427684117125,
"eval_loss": 0.9943842887878418,
"eval_runtime": 10.7058,
"eval_samples_per_second": 48.292,
"eval_steps_per_second": 6.071,
"step": 26400
},
{
"epoch": 33.84,
"learning_rate": 4.635845471817606e-05,
"loss": 0.1592,
"step": 26800
},
{
"epoch": 33.84,
"eval_cer": 0.3678793256433008,
"eval_loss": 0.9912722110748291,
"eval_runtime": 10.6845,
"eval_samples_per_second": 48.388,
"eval_steps_per_second": 6.084,
"step": 26800
},
{
"epoch": 34.34,
"learning_rate": 4.255858138062065e-05,
"loss": 0.1574,
"step": 27200
},
{
"epoch": 34.34,
"eval_cer": 0.36876663708961843,
"eval_loss": 1.0088311433792114,
"eval_runtime": 10.6592,
"eval_samples_per_second": 48.503,
"eval_steps_per_second": 6.098,
"step": 27200
},
{
"epoch": 34.85,
"learning_rate": 3.875870804306523e-05,
"loss": 0.1537,
"step": 27600
},
{
"epoch": 34.85,
"eval_cer": 0.3645075421472937,
"eval_loss": 0.9913118481636047,
"eval_runtime": 11.1744,
"eval_samples_per_second": 46.266,
"eval_steps_per_second": 5.817,
"step": 27600
},
{
"epoch": 35.35,
"learning_rate": 3.495883470550981e-05,
"loss": 0.1461,
"step": 28000
},
{
"epoch": 35.35,
"eval_cer": 0.3634427684117125,
"eval_loss": 0.9954361915588379,
"eval_runtime": 10.4022,
"eval_samples_per_second": 49.701,
"eval_steps_per_second": 6.249,
"step": 28000
},
{
"epoch": 35.86,
"learning_rate": 3.1158961367954396e-05,
"loss": 0.1462,
"step": 28400
},
{
"epoch": 35.86,
"eval_cer": 0.35989352262644186,
"eval_loss": 0.9881103038787842,
"eval_runtime": 10.6493,
"eval_samples_per_second": 48.548,
"eval_steps_per_second": 6.104,
"step": 28400
},
{
"epoch": 36.36,
"learning_rate": 2.7359088030398983e-05,
"loss": 0.1412,
"step": 28800
},
{
"epoch": 36.36,
"eval_cer": 0.3593611357586513,
"eval_loss": 0.9881191849708557,
"eval_runtime": 11.1771,
"eval_samples_per_second": 46.255,
"eval_steps_per_second": 5.815,
"step": 28800
},
{
"epoch": 36.87,
"learning_rate": 2.3559214692843567e-05,
"loss": 0.1382,
"step": 29200
},
{
"epoch": 36.87,
"eval_cer": 0.36184560780834074,
"eval_loss": 0.9879063963890076,
"eval_runtime": 10.6397,
"eval_samples_per_second": 48.592,
"eval_steps_per_second": 6.109,
"step": 29200
},
{
"epoch": 37.37,
"learning_rate": 1.9759341355288154e-05,
"loss": 0.1395,
"step": 29600
},
{
"epoch": 37.37,
"eval_cer": 0.3582963620230701,
"eval_loss": 0.9859166145324707,
"eval_runtime": 10.6674,
"eval_samples_per_second": 48.465,
"eval_steps_per_second": 6.093,
"step": 29600
},
{
"epoch": 37.88,
"learning_rate": 1.595946801773274e-05,
"loss": 0.1375,
"step": 30000
},
{
"epoch": 37.88,
"eval_cer": 0.36024844720496896,
"eval_loss": 0.9944302439689636,
"eval_runtime": 10.7121,
"eval_samples_per_second": 48.263,
"eval_steps_per_second": 6.068,
"step": 30000
},
{
"epoch": 38.38,
"learning_rate": 1.2159594680177326e-05,
"loss": 0.1277,
"step": 30400
},
{
"epoch": 38.38,
"eval_cer": 0.3611357586512866,
"eval_loss": 0.9873452186584473,
"eval_runtime": 10.6816,
"eval_samples_per_second": 48.401,
"eval_steps_per_second": 6.085,
"step": 30400
},
{
"epoch": 38.89,
"learning_rate": 8.359721342621911e-06,
"loss": 0.1356,
"step": 30800
},
{
"epoch": 38.89,
"eval_cer": 0.36007098491570544,
"eval_loss": 0.9833679795265198,
"eval_runtime": 10.6711,
"eval_samples_per_second": 48.449,
"eval_steps_per_second": 6.091,
"step": 30800
},
{
"epoch": 59.09,
"learning_rate": 7.859515899383008e-05,
"loss": 0.141,
"step": 31200
},
{
"epoch": 59.09,
"eval_cer": 0.36539485359361135,
"eval_loss": 1.0076383352279663,
"eval_runtime": 11.8848,
"eval_samples_per_second": 43.501,
"eval_steps_per_second": 5.469,
"step": 31200
},
{
"epoch": 59.85,
"learning_rate": 7.574750830564784e-05,
"loss": 0.1391,
"step": 31600
},
{
"epoch": 59.85,
"eval_cer": 0.363265306122449,
"eval_loss": 1.0228257179260254,
"eval_runtime": 10.7532,
"eval_samples_per_second": 48.079,
"eval_steps_per_second": 6.045,
"step": 31600
},
{
"epoch": 60.61,
"learning_rate": 7.289985761746559e-05,
"loss": 0.1444,
"step": 32000
},
{
"epoch": 60.61,
"eval_cer": 0.36876663708961843,
"eval_loss": 1.0302114486694336,
"eval_runtime": 10.5859,
"eval_samples_per_second": 48.838,
"eval_steps_per_second": 6.14,
"step": 32000
},
{
"epoch": 61.36,
"learning_rate": 7.005220692928333e-05,
"loss": 0.1396,
"step": 32400
},
{
"epoch": 61.36,
"eval_cer": 0.3634427684117125,
"eval_loss": 1.0219813585281372,
"eval_runtime": 10.7349,
"eval_samples_per_second": 48.161,
"eval_steps_per_second": 6.055,
"step": 32400
},
{
"epoch": 62.12,
"learning_rate": 6.720455624110109e-05,
"loss": 0.1383,
"step": 32800
},
{
"epoch": 62.12,
"eval_cer": 0.3625554569653949,
"eval_loss": 1.0074561834335327,
"eval_runtime": 10.7365,
"eval_samples_per_second": 48.154,
"eval_steps_per_second": 6.054,
"step": 32800
},
{
"epoch": 62.88,
"learning_rate": 6.435690555291883e-05,
"loss": 0.1338,
"step": 33200
},
{
"epoch": 62.88,
"eval_cer": 0.36131322094055013,
"eval_loss": 1.009969711303711,
"eval_runtime": 10.8491,
"eval_samples_per_second": 47.654,
"eval_steps_per_second": 5.991,
"step": 33200
},
{
"epoch": 63.64,
"learning_rate": 6.150925486473658e-05,
"loss": 0.1322,
"step": 33600
},
{
"epoch": 63.64,
"eval_cer": 0.35989352262644186,
"eval_loss": 1.0064263343811035,
"eval_runtime": 10.7017,
"eval_samples_per_second": 48.31,
"eval_steps_per_second": 6.074,
"step": 33600
},
{
"epoch": 64.39,
"learning_rate": 5.866160417655434e-05,
"loss": 0.1313,
"step": 34000
},
{
"epoch": 64.39,
"eval_cer": 0.3611357586512866,
"eval_loss": 1.0025349855422974,
"eval_runtime": 10.8036,
"eval_samples_per_second": 47.855,
"eval_steps_per_second": 6.017,
"step": 34000
},
{
"epoch": 65.15,
"learning_rate": 5.581395348837209e-05,
"loss": 0.1275,
"step": 34400
},
{
"epoch": 65.15,
"eval_cer": 0.3625554569653949,
"eval_loss": 0.9986574649810791,
"eval_runtime": 10.7283,
"eval_samples_per_second": 48.19,
"eval_steps_per_second": 6.059,
"step": 34400
},
{
"epoch": 65.91,
"learning_rate": 5.296630280018984e-05,
"loss": 0.125,
"step": 34800
},
{
"epoch": 65.91,
"eval_cer": 0.36574977817213844,
"eval_loss": 1.010204553604126,
"eval_runtime": 10.8039,
"eval_samples_per_second": 47.853,
"eval_steps_per_second": 6.016,
"step": 34800
},
{
"epoch": 66.67,
"learning_rate": 5.011865211200759e-05,
"loss": 0.121,
"step": 35200
},
{
"epoch": 66.67,
"eval_cer": 0.36308784383318543,
"eval_loss": 1.0088319778442383,
"eval_runtime": 10.4383,
"eval_samples_per_second": 49.529,
"eval_steps_per_second": 6.227,
"step": 35200
},
{
"epoch": 67.42,
"learning_rate": 4.727100142382534e-05,
"loss": 0.1247,
"step": 35600
},
{
"epoch": 67.42,
"eval_cer": 0.3648624667258208,
"eval_loss": 1.0154913663864136,
"eval_runtime": 10.7106,
"eval_samples_per_second": 48.27,
"eval_steps_per_second": 6.069,
"step": 35600
},
{
"epoch": 68.18,
"learning_rate": 4.442335073564309e-05,
"loss": 0.1164,
"step": 36000
},
{
"epoch": 68.18,
"eval_cer": 0.3622005323868678,
"eval_loss": 0.9949304461479187,
"eval_runtime": 10.713,
"eval_samples_per_second": 48.259,
"eval_steps_per_second": 6.067,
"step": 36000
},
{
"epoch": 68.94,
"learning_rate": 4.157570004746084e-05,
"loss": 0.1112,
"step": 36400
},
{
"epoch": 68.94,
"eval_cer": 0.3609582963620231,
"eval_loss": 1.00165593624115,
"eval_runtime": 10.7166,
"eval_samples_per_second": 48.243,
"eval_steps_per_second": 6.065,
"step": 36400
},
{
"epoch": 69.7,
"learning_rate": 3.872804935927859e-05,
"loss": 0.1143,
"step": 36800
},
{
"epoch": 69.7,
"eval_cer": 0.3595385980479148,
"eval_loss": 0.9980924725532532,
"eval_runtime": 10.7622,
"eval_samples_per_second": 48.038,
"eval_steps_per_second": 6.04,
"step": 36800
},
{
"epoch": 70.45,
"learning_rate": 3.588039867109634e-05,
"loss": 0.109,
"step": 37200
},
{
"epoch": 70.45,
"eval_cer": 0.3604259094942325,
"eval_loss": 1.001591682434082,
"eval_runtime": 10.6962,
"eval_samples_per_second": 48.335,
"eval_steps_per_second": 6.077,
"step": 37200
},
{
"epoch": 71.21,
"learning_rate": 3.303274798291409e-05,
"loss": 0.1066,
"step": 37600
},
{
"epoch": 71.21,
"eval_cer": 0.35918367346938773,
"eval_loss": 0.9884746074676514,
"eval_runtime": 10.6604,
"eval_samples_per_second": 48.497,
"eval_steps_per_second": 6.097,
"step": 37600
},
{
"epoch": 71.97,
"learning_rate": 3.0185097294731845e-05,
"loss": 0.1042,
"step": 38000
},
{
"epoch": 71.97,
"eval_cer": 0.36007098491570544,
"eval_loss": 0.9990329742431641,
"eval_runtime": 10.7259,
"eval_samples_per_second": 48.201,
"eval_steps_per_second": 6.06,
"step": 38000
},
{
"epoch": 72.73,
"learning_rate": 2.7337446606549593e-05,
"loss": 0.1024,
"step": 38400
},
{
"epoch": 72.73,
"eval_cer": 0.36007098491570544,
"eval_loss": 0.9916397333145142,
"eval_runtime": 10.721,
"eval_samples_per_second": 48.223,
"eval_steps_per_second": 6.063,
"step": 38400
},
{
"epoch": 73.48,
"learning_rate": 2.448979591836734e-05,
"loss": 0.1064,
"step": 38800
},
{
"epoch": 73.48,
"eval_cer": 0.35811889973380656,
"eval_loss": 0.9944778084754944,
"eval_runtime": 10.7694,
"eval_samples_per_second": 48.006,
"eval_steps_per_second": 6.036,
"step": 38800
},
{
"epoch": 74.24,
"learning_rate": 2.1642145230185097e-05,
"loss": 0.1019,
"step": 39200
},
{
"epoch": 74.24,
"eval_cer": 0.3566992014196983,
"eval_loss": 0.9997159838676453,
"eval_runtime": 10.7395,
"eval_samples_per_second": 48.14,
"eval_steps_per_second": 6.052,
"step": 39200
},
{
"epoch": 75.0,
"learning_rate": 1.8794494542002845e-05,
"loss": 0.0977,
"step": 39600
},
{
"epoch": 75.0,
"eval_cer": 0.35616681455190774,
"eval_loss": 0.9909945130348206,
"eval_runtime": 10.7111,
"eval_samples_per_second": 48.268,
"eval_steps_per_second": 6.068,
"step": 39600
},
{
"epoch": 75.76,
"learning_rate": 1.5946843853820597e-05,
"loss": 0.097,
"step": 40000
},
{
"epoch": 75.76,
"eval_cer": 0.35598935226264417,
"eval_loss": 0.9969141483306885,
"eval_runtime": 10.7789,
"eval_samples_per_second": 47.964,
"eval_steps_per_second": 6.03,
"step": 40000
}
],
"max_steps": 42240,
"num_train_epochs": 80,
"total_flos": 9.124217746582361e+19,
"trial_name": null,
"trial_params": null
}