xls-r-greek-aivaliot / trainer_state.json
ctsoukala's picture
Upload 12 files
f418d0b verified
{
"best_metric": 0.7119987566055331,
"best_model_checkpoint": "xls-r-greek-aivaliot/checkpoint-14196",
"epoch": 35.0,
"eval_steps": 500,
"global_step": 19110,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.37,
"learning_rate": 0.00011999999999999999,
"loss": 6.1718,
"step": 200
},
{
"epoch": 0.73,
"learning_rate": 0.00023999999999999998,
"loss": 3.1837,
"step": 400
},
{
"epoch": 1.0,
"eval_cer": 0.6650782444993528,
"eval_loss": 2.3546931743621826,
"eval_runtime": 42.745,
"eval_samples_per_second": 25.547,
"eval_steps_per_second": 3.205,
"eval_wer": 0.972023624494871,
"step": 546
},
{
"epoch": 1.1,
"learning_rate": 0.00029838796346050506,
"loss": 2.2974,
"step": 600
},
{
"epoch": 1.47,
"learning_rate": 0.0002951638903815153,
"loss": 2.0321,
"step": 800
},
{
"epoch": 1.83,
"learning_rate": 0.00029193981730252553,
"loss": 1.9583,
"step": 1000
},
{
"epoch": 2.0,
"eval_cer": 0.45720084715848924,
"eval_loss": 1.9236657619476318,
"eval_runtime": 44.6726,
"eval_samples_per_second": 24.444,
"eval_steps_per_second": 3.067,
"eval_wer": 0.8734846129934721,
"step": 1092
},
{
"epoch": 2.2,
"learning_rate": 0.0002887157442235357,
"loss": 1.7559,
"step": 1200
},
{
"epoch": 2.56,
"learning_rate": 0.00028549167114454594,
"loss": 1.6492,
"step": 1400
},
{
"epoch": 2.93,
"learning_rate": 0.0002822675980655561,
"loss": 1.6148,
"step": 1600
},
{
"epoch": 3.0,
"eval_cer": 0.450847158489234,
"eval_loss": 1.817557692527771,
"eval_runtime": 41.1726,
"eval_samples_per_second": 26.522,
"eval_steps_per_second": 3.327,
"eval_wer": 0.8234379857009636,
"step": 1638
},
{
"epoch": 3.3,
"learning_rate": 0.00027904352498656635,
"loss": 1.421,
"step": 1800
},
{
"epoch": 3.66,
"learning_rate": 0.0002758194519075766,
"loss": 1.423,
"step": 2000
},
{
"epoch": 4.0,
"eval_cer": 0.4232556771384869,
"eval_loss": 1.8342238664627075,
"eval_runtime": 39.2094,
"eval_samples_per_second": 27.85,
"eval_steps_per_second": 3.494,
"eval_wer": 0.820329499533727,
"step": 2184
},
{
"epoch": 4.03,
"learning_rate": 0.00027259537882858676,
"loss": 1.4186,
"step": 2200
},
{
"epoch": 4.4,
"learning_rate": 0.000269371305749597,
"loss": 1.2185,
"step": 2400
},
{
"epoch": 4.76,
"learning_rate": 0.00026614723267060717,
"loss": 1.2324,
"step": 2600
},
{
"epoch": 5.0,
"eval_cer": 0.4242852100247088,
"eval_loss": 1.7897675037384033,
"eval_runtime": 45.221,
"eval_samples_per_second": 24.148,
"eval_steps_per_second": 3.03,
"eval_wer": 0.8047870686975443,
"step": 2730
},
{
"epoch": 5.13,
"learning_rate": 0.00026292315959161735,
"loss": 1.135,
"step": 2800
},
{
"epoch": 5.49,
"learning_rate": 0.0002596990865126276,
"loss": 1.0774,
"step": 3000
},
{
"epoch": 5.86,
"learning_rate": 0.0002564750134336378,
"loss": 1.0871,
"step": 3200
},
{
"epoch": 6.0,
"eval_cer": 0.40566537239675254,
"eval_loss": 1.783974051475525,
"eval_runtime": 39.7961,
"eval_samples_per_second": 27.44,
"eval_steps_per_second": 3.443,
"eval_wer": 0.7671743860739819,
"step": 3276
},
{
"epoch": 6.23,
"learning_rate": 0.000253250940354648,
"loss": 1.0111,
"step": 3400
},
{
"epoch": 6.59,
"learning_rate": 0.0002500268672756582,
"loss": 0.9552,
"step": 3600
},
{
"epoch": 6.96,
"learning_rate": 0.00024680279419666846,
"loss": 0.9449,
"step": 3800
},
{
"epoch": 7.0,
"eval_cer": 0.3969584657018473,
"eval_loss": 1.9680403470993042,
"eval_runtime": 41.0562,
"eval_samples_per_second": 26.598,
"eval_steps_per_second": 3.337,
"eval_wer": 0.7777432390425862,
"step": 3822
},
{
"epoch": 7.33,
"learning_rate": 0.00024357872111767866,
"loss": 0.8435,
"step": 4000
},
{
"epoch": 7.69,
"learning_rate": 0.00024035464803868887,
"loss": 0.8168,
"step": 4200
},
{
"epoch": 8.0,
"eval_cer": 0.4033415695964231,
"eval_loss": 1.956693172454834,
"eval_runtime": 38.8124,
"eval_samples_per_second": 28.135,
"eval_steps_per_second": 3.53,
"eval_wer": 0.7690394777743239,
"step": 4368
},
{
"epoch": 8.06,
"learning_rate": 0.00023713057495969905,
"loss": 0.8366,
"step": 4400
},
{
"epoch": 8.42,
"learning_rate": 0.00023390650188070925,
"loss": 0.728,
"step": 4600
},
{
"epoch": 8.79,
"learning_rate": 0.00023068242880171949,
"loss": 0.7254,
"step": 4800
},
{
"epoch": 9.0,
"eval_cer": 0.408224496999647,
"eval_loss": 2.184379816055298,
"eval_runtime": 39.1148,
"eval_samples_per_second": 27.918,
"eval_steps_per_second": 3.503,
"eval_wer": 0.7878458190861051,
"step": 4914
},
{
"epoch": 9.16,
"learning_rate": 0.0002274583557227297,
"loss": 0.7016,
"step": 5000
},
{
"epoch": 9.52,
"learning_rate": 0.0002242342826437399,
"loss": 0.6563,
"step": 5200
},
{
"epoch": 9.89,
"learning_rate": 0.0002210102095647501,
"loss": 0.6584,
"step": 5400
},
{
"epoch": 10.0,
"eval_cer": 0.3924579362277915,
"eval_loss": 2.313096046447754,
"eval_runtime": 38.9682,
"eval_samples_per_second": 28.023,
"eval_steps_per_second": 3.516,
"eval_wer": 0.7681069319241529,
"step": 5460
},
{
"epoch": 10.26,
"learning_rate": 0.0002177861364857603,
"loss": 0.6053,
"step": 5600
},
{
"epoch": 10.62,
"learning_rate": 0.00021456206340677054,
"loss": 0.5844,
"step": 5800
},
{
"epoch": 10.99,
"learning_rate": 0.00021133799032778075,
"loss": 0.5825,
"step": 6000
},
{
"epoch": 11.0,
"eval_cer": 0.39419343452170846,
"eval_loss": 2.4024157524108887,
"eval_runtime": 38.7244,
"eval_samples_per_second": 28.199,
"eval_steps_per_second": 3.538,
"eval_wer": 0.7614236866645944,
"step": 6006
},
{
"epoch": 11.36,
"learning_rate": 0.00020811391724879095,
"loss": 0.4944,
"step": 6200
},
{
"epoch": 11.72,
"learning_rate": 0.00020488984416980116,
"loss": 0.5214,
"step": 6400
},
{
"epoch": 12.0,
"eval_cer": 0.37542652076714905,
"eval_loss": 2.2336461544036865,
"eval_runtime": 39.0775,
"eval_samples_per_second": 27.944,
"eval_steps_per_second": 3.506,
"eval_wer": 0.7415293751942804,
"step": 6552
},
{
"epoch": 12.09,
"learning_rate": 0.0002016657710908114,
"loss": 0.4977,
"step": 6600
},
{
"epoch": 12.45,
"learning_rate": 0.0001984416980118216,
"loss": 0.4326,
"step": 6800
},
{
"epoch": 12.82,
"learning_rate": 0.0001952176249328318,
"loss": 0.4618,
"step": 7000
},
{
"epoch": 13.0,
"eval_cer": 0.38816331333098014,
"eval_loss": 2.4235401153564453,
"eval_runtime": 38.6611,
"eval_samples_per_second": 28.245,
"eval_steps_per_second": 3.544,
"eval_wer": 0.7475909232203917,
"step": 7098
},
{
"epoch": 13.19,
"learning_rate": 0.000191993551853842,
"loss": 0.4269,
"step": 7200
},
{
"epoch": 13.55,
"learning_rate": 0.0001887694787748522,
"loss": 0.3915,
"step": 7400
},
{
"epoch": 13.92,
"learning_rate": 0.00018554540569586244,
"loss": 0.4034,
"step": 7600
},
{
"epoch": 14.0,
"eval_cer": 0.38966348982233207,
"eval_loss": 2.432621479034424,
"eval_runtime": 39.0269,
"eval_samples_per_second": 27.981,
"eval_steps_per_second": 3.51,
"eval_wer": 0.7384208890270438,
"step": 7644
},
{
"epoch": 14.29,
"learning_rate": 0.00018232133261687265,
"loss": 0.3616,
"step": 7800
},
{
"epoch": 14.65,
"learning_rate": 0.00017909725953788285,
"loss": 0.3638,
"step": 8000
},
{
"epoch": 15.0,
"eval_cer": 0.38351570773032123,
"eval_loss": 2.5917038917541504,
"eval_runtime": 38.9489,
"eval_samples_per_second": 28.037,
"eval_steps_per_second": 3.517,
"eval_wer": 0.7409076779608331,
"step": 8190
},
{
"epoch": 15.02,
"learning_rate": 0.00017587318645889303,
"loss": 0.3469,
"step": 8200
},
{
"epoch": 15.38,
"learning_rate": 0.00017264911337990324,
"loss": 0.2989,
"step": 8400
},
{
"epoch": 15.75,
"learning_rate": 0.0001694250403009135,
"loss": 0.307,
"step": 8600
},
{
"epoch": 16.0,
"eval_cer": 0.38233909871749616,
"eval_loss": 2.653606414794922,
"eval_runtime": 38.6877,
"eval_samples_per_second": 28.226,
"eval_steps_per_second": 3.541,
"eval_wer": 0.7483680447622008,
"step": 8736
},
{
"epoch": 16.12,
"learning_rate": 0.00016620096722192368,
"loss": 0.3028,
"step": 8800
},
{
"epoch": 16.48,
"learning_rate": 0.00016297689414293388,
"loss": 0.2857,
"step": 9000
},
{
"epoch": 16.85,
"learning_rate": 0.00015975282106394409,
"loss": 0.2794,
"step": 9200
},
{
"epoch": 17.0,
"eval_cer": 0.3855159430521238,
"eval_loss": 2.7668089866638184,
"eval_runtime": 38.9844,
"eval_samples_per_second": 28.011,
"eval_steps_per_second": 3.514,
"eval_wer": 0.7413739508859185,
"step": 9282
},
{
"epoch": 17.22,
"learning_rate": 0.00015652874798495432,
"loss": 0.2484,
"step": 9400
},
{
"epoch": 17.58,
"learning_rate": 0.00015330467490596452,
"loss": 0.245,
"step": 9600
},
{
"epoch": 17.95,
"learning_rate": 0.00015008060182697473,
"loss": 0.2445,
"step": 9800
},
{
"epoch": 18.0,
"eval_cer": 0.3990469466996117,
"eval_loss": 2.955599069595337,
"eval_runtime": 38.9844,
"eval_samples_per_second": 28.011,
"eval_steps_per_second": 3.514,
"eval_wer": 0.7597140192726143,
"step": 9828
},
{
"epoch": 18.32,
"learning_rate": 0.00014685652874798494,
"loss": 0.208,
"step": 10000
},
{
"epoch": 18.68,
"learning_rate": 0.00014363245566899517,
"loss": 0.2209,
"step": 10200
},
{
"epoch": 19.0,
"eval_cer": 0.38439816448994,
"eval_loss": 2.9723663330078125,
"eval_runtime": 38.8984,
"eval_samples_per_second": 28.073,
"eval_steps_per_second": 3.522,
"eval_wer": 0.7396642834939384,
"step": 10374
},
{
"epoch": 19.05,
"learning_rate": 0.00014040838259000535,
"loss": 0.2107,
"step": 10400
},
{
"epoch": 19.41,
"learning_rate": 0.00013718430951101558,
"loss": 0.1935,
"step": 10600
},
{
"epoch": 19.78,
"learning_rate": 0.00013396023643202578,
"loss": 0.193,
"step": 10800
},
{
"epoch": 20.0,
"eval_cer": 0.3829568184492293,
"eval_loss": 3.105555534362793,
"eval_runtime": 38.7297,
"eval_samples_per_second": 28.195,
"eval_steps_per_second": 3.537,
"eval_wer": 0.7412185265775567,
"step": 10920
},
{
"epoch": 20.15,
"learning_rate": 0.000130736163353036,
"loss": 0.1764,
"step": 11000
},
{
"epoch": 20.51,
"learning_rate": 0.0001275120902740462,
"loss": 0.1683,
"step": 11200
},
{
"epoch": 20.88,
"learning_rate": 0.0001242880171950564,
"loss": 0.1608,
"step": 11400
},
{
"epoch": 21.0,
"eval_cer": 0.3850452994469938,
"eval_loss": 3.2178432941436768,
"eval_runtime": 38.643,
"eval_samples_per_second": 28.259,
"eval_steps_per_second": 3.545,
"eval_wer": 0.7332918868511035,
"step": 11466
},
{
"epoch": 21.25,
"learning_rate": 0.00012106394411606662,
"loss": 0.1514,
"step": 11600
},
{
"epoch": 21.61,
"learning_rate": 0.00011783987103707682,
"loss": 0.1421,
"step": 11800
},
{
"epoch": 21.98,
"learning_rate": 0.00011461579795808704,
"loss": 0.1454,
"step": 12000
},
{
"epoch": 22.0,
"eval_cer": 0.3789563478056242,
"eval_loss": 3.3512697219848633,
"eval_runtime": 38.841,
"eval_samples_per_second": 28.115,
"eval_steps_per_second": 3.527,
"eval_wer": 0.7322039166925707,
"step": 12012
},
{
"epoch": 22.34,
"learning_rate": 0.00011139172487909725,
"loss": 0.1378,
"step": 12200
},
{
"epoch": 22.71,
"learning_rate": 0.00010816765180010745,
"loss": 0.1269,
"step": 12400
},
{
"epoch": 23.0,
"eval_cer": 0.38342746205435935,
"eval_loss": 3.4092280864715576,
"eval_runtime": 39.0183,
"eval_samples_per_second": 27.987,
"eval_steps_per_second": 3.511,
"eval_wer": 0.7311159465340379,
"step": 12558
},
{
"epoch": 23.08,
"learning_rate": 0.00010494357872111767,
"loss": 0.1334,
"step": 12600
},
{
"epoch": 23.44,
"learning_rate": 0.00010171950564212788,
"loss": 0.1069,
"step": 12800
},
{
"epoch": 23.81,
"learning_rate": 9.84954325631381e-05,
"loss": 0.114,
"step": 13000
},
{
"epoch": 24.0,
"eval_cer": 0.3863101541357807,
"eval_loss": 3.3670547008514404,
"eval_runtime": 38.7589,
"eval_samples_per_second": 28.174,
"eval_steps_per_second": 3.535,
"eval_wer": 0.7300279763755051,
"step": 13104
},
{
"epoch": 24.18,
"learning_rate": 9.527135948414829e-05,
"loss": 0.1064,
"step": 13200
},
{
"epoch": 24.54,
"learning_rate": 9.204728640515852e-05,
"loss": 0.1056,
"step": 13400
},
{
"epoch": 24.91,
"learning_rate": 8.882321332616871e-05,
"loss": 0.1086,
"step": 13600
},
{
"epoch": 25.0,
"eval_cer": 0.38175079421108365,
"eval_loss": 3.6167728900909424,
"eval_runtime": 38.7877,
"eval_samples_per_second": 28.153,
"eval_steps_per_second": 3.532,
"eval_wer": 0.7179048803232826,
"step": 13650
},
{
"epoch": 25.27,
"learning_rate": 8.559914024717892e-05,
"loss": 0.0853,
"step": 13800
},
{
"epoch": 25.64,
"learning_rate": 8.237506716818914e-05,
"loss": 0.0877,
"step": 14000
},
{
"epoch": 26.0,
"eval_cer": 0.3746617249088128,
"eval_loss": 3.5180928707122803,
"eval_runtime": 38.7044,
"eval_samples_per_second": 28.214,
"eval_steps_per_second": 3.54,
"eval_wer": 0.7119987566055331,
"step": 14196
},
{
"epoch": 26.01,
"learning_rate": 7.915099408919934e-05,
"loss": 0.0966,
"step": 14200
},
{
"epoch": 26.37,
"learning_rate": 7.592692101020956e-05,
"loss": 0.0855,
"step": 14400
},
{
"epoch": 26.74,
"learning_rate": 7.270284793121977e-05,
"loss": 0.0856,
"step": 14600
},
{
"epoch": 27.0,
"eval_cer": 0.39136957289092833,
"eval_loss": 3.875309467315674,
"eval_runtime": 38.7918,
"eval_samples_per_second": 28.15,
"eval_steps_per_second": 3.532,
"eval_wer": 0.7429281939695368,
"step": 14742
},
{
"epoch": 27.11,
"learning_rate": 6.947877485222997e-05,
"loss": 0.081,
"step": 14800
},
{
"epoch": 27.47,
"learning_rate": 6.625470177324019e-05,
"loss": 0.0708,
"step": 15000
},
{
"epoch": 27.84,
"learning_rate": 6.30306286942504e-05,
"loss": 0.0734,
"step": 15200
},
{
"epoch": 28.0,
"eval_cer": 0.37516178373926345,
"eval_loss": 3.8759613037109375,
"eval_runtime": 38.881,
"eval_samples_per_second": 28.086,
"eval_steps_per_second": 3.524,
"eval_wer": 0.7227230338824993,
"step": 15288
},
{
"epoch": 28.21,
"learning_rate": 5.980655561526061e-05,
"loss": 0.0734,
"step": 15400
},
{
"epoch": 28.57,
"learning_rate": 5.658248253627082e-05,
"loss": 0.0642,
"step": 15600
},
{
"epoch": 28.94,
"learning_rate": 5.335840945728102e-05,
"loss": 0.0702,
"step": 15800
},
{
"epoch": 29.0,
"eval_cer": 0.37551476644311094,
"eval_loss": 3.9142181873321533,
"eval_runtime": 38.8332,
"eval_samples_per_second": 28.12,
"eval_steps_per_second": 3.528,
"eval_wer": 0.723811004041032,
"step": 15834
},
{
"epoch": 29.3,
"learning_rate": 5.0134336378291234e-05,
"loss": 0.0617,
"step": 16000
},
{
"epoch": 29.67,
"learning_rate": 4.6910263299301446e-05,
"loss": 0.0661,
"step": 16200
},
{
"epoch": 30.0,
"eval_cer": 0.3737498529238734,
"eval_loss": 4.010254859924316,
"eval_runtime": 38.9811,
"eval_samples_per_second": 28.014,
"eval_steps_per_second": 3.515,
"eval_wer": 0.7196145477152627,
"step": 16380
},
{
"epoch": 30.04,
"learning_rate": 4.368619022031166e-05,
"loss": 0.0559,
"step": 16400
},
{
"epoch": 30.4,
"learning_rate": 4.046211714132187e-05,
"loss": 0.0575,
"step": 16600
},
{
"epoch": 30.77,
"learning_rate": 3.7238044062332076e-05,
"loss": 0.0569,
"step": 16800
},
{
"epoch": 31.0,
"eval_cer": 0.3768090363572185,
"eval_loss": 4.117391109466553,
"eval_runtime": 39.1729,
"eval_samples_per_second": 27.876,
"eval_steps_per_second": 3.497,
"eval_wer": 0.729250854833696,
"step": 16926
},
{
"epoch": 31.14,
"learning_rate": 3.401397098334229e-05,
"loss": 0.0559,
"step": 17000
},
{
"epoch": 31.5,
"learning_rate": 3.078989790435249e-05,
"loss": 0.0501,
"step": 17200
},
{
"epoch": 31.87,
"learning_rate": 2.756582482536271e-05,
"loss": 0.0494,
"step": 17400
},
{
"epoch": 32.0,
"eval_cer": 0.37404400517707964,
"eval_loss": 4.161037921905518,
"eval_runtime": 38.9916,
"eval_samples_per_second": 28.006,
"eval_steps_per_second": 3.514,
"eval_wer": 0.7207025178737955,
"step": 17472
},
{
"epoch": 32.23,
"learning_rate": 2.4341751746372914e-05,
"loss": 0.048,
"step": 17600
},
{
"epoch": 32.6,
"learning_rate": 2.1117678667383127e-05,
"loss": 0.0509,
"step": 17800
},
{
"epoch": 32.97,
"learning_rate": 1.7893605588393335e-05,
"loss": 0.0493,
"step": 18000
},
{
"epoch": 33.0,
"eval_cer": 0.37219084598188024,
"eval_loss": 4.1340861320495605,
"eval_runtime": 38.9947,
"eval_samples_per_second": 28.004,
"eval_steps_per_second": 3.513,
"eval_wer": 0.7165060615480261,
"step": 18018
},
{
"epoch": 33.33,
"learning_rate": 1.4669532509403546e-05,
"loss": 0.0415,
"step": 18200
},
{
"epoch": 33.7,
"learning_rate": 1.1445459430413755e-05,
"loss": 0.0395,
"step": 18400
},
{
"epoch": 34.0,
"eval_cer": 0.372132015531239,
"eval_loss": 4.2251057624816895,
"eval_runtime": 38.8607,
"eval_samples_per_second": 28.1,
"eval_steps_per_second": 3.525,
"eval_wer": 0.7135529996891514,
"step": 18564
},
{
"epoch": 34.07,
"learning_rate": 8.221386351423965e-06,
"loss": 0.0472,
"step": 18600
},
{
"epoch": 34.43,
"learning_rate": 4.997313272434174e-06,
"loss": 0.037,
"step": 18800
},
{
"epoch": 34.8,
"learning_rate": 1.7732401934443846e-06,
"loss": 0.0377,
"step": 19000
},
{
"epoch": 35.0,
"eval_cer": 0.3725438286857277,
"eval_loss": 4.262682914733887,
"eval_runtime": 38.6349,
"eval_samples_per_second": 28.265,
"eval_steps_per_second": 3.546,
"eval_wer": 0.715728940006217,
"step": 19110
}
],
"logging_steps": 200,
"max_steps": 19110,
"num_input_tokens_seen": 0,
"num_train_epochs": 35,
"save_steps": 500,
"total_flos": 2.6904981744760697e+19,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}