whisper-large-v3-ft-btbn-ca-cy / trainer_state.json
DewiBrynJones's picture
End of training
a56110b verified
raw
history blame
36.6 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.1938677738741617,
"eval_steps": 1000,
"global_step": 5000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.015969338869370808,
"grad_norm": 17.92350196838379,
"learning_rate": 4.800000000000001e-07,
"loss": 3.223,
"step": 25
},
{
"epoch": 0.031938677738741615,
"grad_norm": 11.101584434509277,
"learning_rate": 9.800000000000001e-07,
"loss": 2.5321,
"step": 50
},
{
"epoch": 0.047908016608112426,
"grad_norm": 9.936028480529785,
"learning_rate": 1.48e-06,
"loss": 2.0032,
"step": 75
},
{
"epoch": 0.06387735547748323,
"grad_norm": 10.770648956298828,
"learning_rate": 1.98e-06,
"loss": 1.6134,
"step": 100
},
{
"epoch": 0.07984669434685404,
"grad_norm": 10.68463134765625,
"learning_rate": 2.4800000000000004e-06,
"loss": 1.5694,
"step": 125
},
{
"epoch": 0.09581603321622485,
"grad_norm": 10.635124206542969,
"learning_rate": 2.9800000000000003e-06,
"loss": 1.4934,
"step": 150
},
{
"epoch": 0.11178537208559565,
"grad_norm": 9.225995063781738,
"learning_rate": 3.48e-06,
"loss": 1.518,
"step": 175
},
{
"epoch": 0.12775471095496646,
"grad_norm": 9.05696964263916,
"learning_rate": 3.980000000000001e-06,
"loss": 1.4258,
"step": 200
},
{
"epoch": 0.14372404982433729,
"grad_norm": 9.599803924560547,
"learning_rate": 4.48e-06,
"loss": 1.422,
"step": 225
},
{
"epoch": 0.15969338869370808,
"grad_norm": 9.442488670349121,
"learning_rate": 4.980000000000001e-06,
"loss": 1.3208,
"step": 250
},
{
"epoch": 0.17566272756307888,
"grad_norm": 10.746581077575684,
"learning_rate": 5.480000000000001e-06,
"loss": 1.3997,
"step": 275
},
{
"epoch": 0.1916320664324497,
"grad_norm": 9.297645568847656,
"learning_rate": 5.98e-06,
"loss": 1.3343,
"step": 300
},
{
"epoch": 0.2076014053018205,
"grad_norm": 10.026762962341309,
"learning_rate": 6.480000000000001e-06,
"loss": 1.3353,
"step": 325
},
{
"epoch": 0.2235707441711913,
"grad_norm": 9.392842292785645,
"learning_rate": 6.98e-06,
"loss": 1.3451,
"step": 350
},
{
"epoch": 0.23954008304056212,
"grad_norm": 7.376865386962891,
"learning_rate": 7.48e-06,
"loss": 1.2645,
"step": 375
},
{
"epoch": 0.2555094219099329,
"grad_norm": 10.001627922058105,
"learning_rate": 7.980000000000002e-06,
"loss": 1.271,
"step": 400
},
{
"epoch": 0.2714787607793037,
"grad_norm": 7.699875354766846,
"learning_rate": 8.48e-06,
"loss": 1.2116,
"step": 425
},
{
"epoch": 0.28744809964867457,
"grad_norm": 8.733208656311035,
"learning_rate": 8.98e-06,
"loss": 1.2222,
"step": 450
},
{
"epoch": 0.30341743851804537,
"grad_norm": 8.286600112915039,
"learning_rate": 9.48e-06,
"loss": 1.2134,
"step": 475
},
{
"epoch": 0.31938677738741617,
"grad_norm": 7.408142566680908,
"learning_rate": 9.980000000000001e-06,
"loss": 1.1662,
"step": 500
},
{
"epoch": 0.33535611625678696,
"grad_norm": 9.274364471435547,
"learning_rate": 9.946666666666667e-06,
"loss": 1.2752,
"step": 525
},
{
"epoch": 0.35132545512615776,
"grad_norm": 8.4090576171875,
"learning_rate": 9.891111111111113e-06,
"loss": 1.2137,
"step": 550
},
{
"epoch": 0.3672947939955286,
"grad_norm": 8.74751091003418,
"learning_rate": 9.835555555555556e-06,
"loss": 1.1794,
"step": 575
},
{
"epoch": 0.3832641328648994,
"grad_norm": 7.037300109863281,
"learning_rate": 9.780000000000001e-06,
"loss": 1.1806,
"step": 600
},
{
"epoch": 0.3992334717342702,
"grad_norm": 8.758708953857422,
"learning_rate": 9.724444444444445e-06,
"loss": 1.0997,
"step": 625
},
{
"epoch": 0.415202810603641,
"grad_norm": 7.583318710327148,
"learning_rate": 9.66888888888889e-06,
"loss": 1.1122,
"step": 650
},
{
"epoch": 0.4311721494730118,
"grad_norm": 7.173635959625244,
"learning_rate": 9.613333333333335e-06,
"loss": 1.1167,
"step": 675
},
{
"epoch": 0.4471414883423826,
"grad_norm": 6.702845096588135,
"learning_rate": 9.557777777777777e-06,
"loss": 1.0873,
"step": 700
},
{
"epoch": 0.46311082721175345,
"grad_norm": 7.966395854949951,
"learning_rate": 9.502222222222223e-06,
"loss": 1.0764,
"step": 725
},
{
"epoch": 0.47908016608112425,
"grad_norm": 7.768787860870361,
"learning_rate": 9.446666666666667e-06,
"loss": 1.0985,
"step": 750
},
{
"epoch": 0.49504950495049505,
"grad_norm": 8.754780769348145,
"learning_rate": 9.391111111111111e-06,
"loss": 1.0646,
"step": 775
},
{
"epoch": 0.5110188438198658,
"grad_norm": 7.156813144683838,
"learning_rate": 9.335555555555557e-06,
"loss": 1.0149,
"step": 800
},
{
"epoch": 0.5269881826892366,
"grad_norm": 6.864979267120361,
"learning_rate": 9.280000000000001e-06,
"loss": 1.0514,
"step": 825
},
{
"epoch": 0.5429575215586074,
"grad_norm": 6.696143627166748,
"learning_rate": 9.224444444444445e-06,
"loss": 1.0376,
"step": 850
},
{
"epoch": 0.5589268604279782,
"grad_norm": 6.3973469734191895,
"learning_rate": 9.168888888888889e-06,
"loss": 0.992,
"step": 875
},
{
"epoch": 0.5748961992973491,
"grad_norm": 6.48210334777832,
"learning_rate": 9.113333333333335e-06,
"loss": 1.0463,
"step": 900
},
{
"epoch": 0.5908655381667199,
"grad_norm": 7.711927890777588,
"learning_rate": 9.057777777777779e-06,
"loss": 1.0023,
"step": 925
},
{
"epoch": 0.6068348770360907,
"grad_norm": 7.2077250480651855,
"learning_rate": 9.002222222222223e-06,
"loss": 1.0283,
"step": 950
},
{
"epoch": 0.6228042159054615,
"grad_norm": 7.416881561279297,
"learning_rate": 8.946666666666669e-06,
"loss": 1.0249,
"step": 975
},
{
"epoch": 0.6387735547748323,
"grad_norm": 6.398010730743408,
"learning_rate": 8.891111111111111e-06,
"loss": 0.9546,
"step": 1000
},
{
"epoch": 0.6387735547748323,
"eval_loss": 0.4855521023273468,
"eval_runtime": 1667.9567,
"eval_samples_per_second": 2.339,
"eval_steps_per_second": 0.146,
"eval_wer": 0.3615418266660186,
"step": 1000
},
{
"epoch": 0.6547428936442031,
"grad_norm": 5.443120956420898,
"learning_rate": 8.835555555555557e-06,
"loss": 1.013,
"step": 1025
},
{
"epoch": 0.6707122325135739,
"grad_norm": 7.361939907073975,
"learning_rate": 8.78e-06,
"loss": 0.946,
"step": 1050
},
{
"epoch": 0.6866815713829447,
"grad_norm": 6.80987024307251,
"learning_rate": 8.724444444444445e-06,
"loss": 0.9945,
"step": 1075
},
{
"epoch": 0.7026509102523155,
"grad_norm": 6.040420055389404,
"learning_rate": 8.66888888888889e-06,
"loss": 1.0078,
"step": 1100
},
{
"epoch": 0.7186202491216863,
"grad_norm": 7.904597282409668,
"learning_rate": 8.613333333333333e-06,
"loss": 0.9707,
"step": 1125
},
{
"epoch": 0.7345895879910572,
"grad_norm": 6.9557204246521,
"learning_rate": 8.557777777777778e-06,
"loss": 0.9712,
"step": 1150
},
{
"epoch": 0.750558926860428,
"grad_norm": 5.4510178565979,
"learning_rate": 8.502222222222223e-06,
"loss": 0.9659,
"step": 1175
},
{
"epoch": 0.7665282657297988,
"grad_norm": 5.72000789642334,
"learning_rate": 8.446666666666668e-06,
"loss": 0.9368,
"step": 1200
},
{
"epoch": 0.7824976045991696,
"grad_norm": 5.816184043884277,
"learning_rate": 8.391111111111112e-06,
"loss": 0.9089,
"step": 1225
},
{
"epoch": 0.7984669434685404,
"grad_norm": 6.033445835113525,
"learning_rate": 8.335555555555556e-06,
"loss": 0.9502,
"step": 1250
},
{
"epoch": 0.8144362823379112,
"grad_norm": 5.273908615112305,
"learning_rate": 8.28e-06,
"loss": 0.9151,
"step": 1275
},
{
"epoch": 0.830405621207282,
"grad_norm": 6.122651100158691,
"learning_rate": 8.224444444444444e-06,
"loss": 0.9517,
"step": 1300
},
{
"epoch": 0.8463749600766528,
"grad_norm": 6.673498630523682,
"learning_rate": 8.16888888888889e-06,
"loss": 1.0318,
"step": 1325
},
{
"epoch": 0.8623442989460236,
"grad_norm": 7.197372913360596,
"learning_rate": 8.113333333333334e-06,
"loss": 0.937,
"step": 1350
},
{
"epoch": 0.8783136378153944,
"grad_norm": 6.718713283538818,
"learning_rate": 8.057777777777778e-06,
"loss": 0.9325,
"step": 1375
},
{
"epoch": 0.8942829766847652,
"grad_norm": 7.090008735656738,
"learning_rate": 8.002222222222222e-06,
"loss": 0.9044,
"step": 1400
},
{
"epoch": 0.9102523155541361,
"grad_norm": 8.890588760375977,
"learning_rate": 7.946666666666666e-06,
"loss": 0.8935,
"step": 1425
},
{
"epoch": 0.9262216544235069,
"grad_norm": 5.521778583526611,
"learning_rate": 7.891111111111112e-06,
"loss": 0.8857,
"step": 1450
},
{
"epoch": 0.9421909932928777,
"grad_norm": 6.829771041870117,
"learning_rate": 7.835555555555556e-06,
"loss": 0.9411,
"step": 1475
},
{
"epoch": 0.9581603321622485,
"grad_norm": 6.48971700668335,
"learning_rate": 7.78e-06,
"loss": 0.8616,
"step": 1500
},
{
"epoch": 0.9741296710316193,
"grad_norm": 5.809508323669434,
"learning_rate": 7.724444444444446e-06,
"loss": 0.8976,
"step": 1525
},
{
"epoch": 0.9900990099009901,
"grad_norm": 6.918030738830566,
"learning_rate": 7.66888888888889e-06,
"loss": 0.9213,
"step": 1550
},
{
"epoch": 1.006068348770361,
"grad_norm": 5.847545623779297,
"learning_rate": 7.613333333333334e-06,
"loss": 0.813,
"step": 1575
},
{
"epoch": 1.0220376876397317,
"grad_norm": 5.331517219543457,
"learning_rate": 7.557777777777779e-06,
"loss": 0.6469,
"step": 1600
},
{
"epoch": 1.0380070265091026,
"grad_norm": 7.251556396484375,
"learning_rate": 7.502222222222223e-06,
"loss": 0.646,
"step": 1625
},
{
"epoch": 1.0539763653784733,
"grad_norm": 5.755091667175293,
"learning_rate": 7.446666666666668e-06,
"loss": 0.6496,
"step": 1650
},
{
"epoch": 1.0699457042478442,
"grad_norm": 5.709854602813721,
"learning_rate": 7.3911111111111125e-06,
"loss": 0.6459,
"step": 1675
},
{
"epoch": 1.0859150431172149,
"grad_norm": 5.866008281707764,
"learning_rate": 7.335555555555556e-06,
"loss": 0.6629,
"step": 1700
},
{
"epoch": 1.1018843819865858,
"grad_norm": 5.575136184692383,
"learning_rate": 7.280000000000001e-06,
"loss": 0.6724,
"step": 1725
},
{
"epoch": 1.1178537208559565,
"grad_norm": 5.722553730010986,
"learning_rate": 7.224444444444445e-06,
"loss": 0.6946,
"step": 1750
},
{
"epoch": 1.1338230597253274,
"grad_norm": 5.85053825378418,
"learning_rate": 7.1688888888888895e-06,
"loss": 0.6676,
"step": 1775
},
{
"epoch": 1.1497923985946983,
"grad_norm": 6.432461261749268,
"learning_rate": 7.113333333333334e-06,
"loss": 0.6381,
"step": 1800
},
{
"epoch": 1.165761737464069,
"grad_norm": 6.030885696411133,
"learning_rate": 7.057777777777778e-06,
"loss": 0.683,
"step": 1825
},
{
"epoch": 1.1817310763334399,
"grad_norm": 5.215100288391113,
"learning_rate": 7.0022222222222225e-06,
"loss": 0.6623,
"step": 1850
},
{
"epoch": 1.1977004152028106,
"grad_norm": 5.460089206695557,
"learning_rate": 6.946666666666667e-06,
"loss": 0.6519,
"step": 1875
},
{
"epoch": 1.2136697540721815,
"grad_norm": 5.882613182067871,
"learning_rate": 6.891111111111111e-06,
"loss": 0.6788,
"step": 1900
},
{
"epoch": 1.2296390929415522,
"grad_norm": 5.0330915451049805,
"learning_rate": 6.835555555555556e-06,
"loss": 0.6654,
"step": 1925
},
{
"epoch": 1.245608431810923,
"grad_norm": 4.624293804168701,
"learning_rate": 6.780000000000001e-06,
"loss": 0.6394,
"step": 1950
},
{
"epoch": 1.2615777706802938,
"grad_norm": 5.252276420593262,
"learning_rate": 6.724444444444444e-06,
"loss": 0.6522,
"step": 1975
},
{
"epoch": 1.2775471095496647,
"grad_norm": 5.393728733062744,
"learning_rate": 6.668888888888889e-06,
"loss": 0.6673,
"step": 2000
},
{
"epoch": 1.2775471095496647,
"eval_loss": 0.41851192712783813,
"eval_runtime": 1656.9911,
"eval_samples_per_second": 2.354,
"eval_steps_per_second": 0.147,
"eval_wer": 0.31497593933796725,
"step": 2000
},
{
"epoch": 1.2935164484190356,
"grad_norm": 7.740047454833984,
"learning_rate": 6.613333333333334e-06,
"loss": 0.643,
"step": 2025
},
{
"epoch": 1.3094857872884063,
"grad_norm": 4.754881381988525,
"learning_rate": 6.557777777777778e-06,
"loss": 0.6545,
"step": 2050
},
{
"epoch": 1.325455126157777,
"grad_norm": 5.5493693351745605,
"learning_rate": 6.502222222222223e-06,
"loss": 0.6741,
"step": 2075
},
{
"epoch": 1.3414244650271478,
"grad_norm": 5.743963241577148,
"learning_rate": 6.446666666666668e-06,
"loss": 0.65,
"step": 2100
},
{
"epoch": 1.3573938038965188,
"grad_norm": 6.479578495025635,
"learning_rate": 6.391111111111111e-06,
"loss": 0.6345,
"step": 2125
},
{
"epoch": 1.3733631427658894,
"grad_norm": 5.8687286376953125,
"learning_rate": 6.335555555555556e-06,
"loss": 0.6612,
"step": 2150
},
{
"epoch": 1.3893324816352604,
"grad_norm": 4.9471893310546875,
"learning_rate": 6.280000000000001e-06,
"loss": 0.6616,
"step": 2175
},
{
"epoch": 1.405301820504631,
"grad_norm": 4.968109130859375,
"learning_rate": 6.224444444444445e-06,
"loss": 0.6405,
"step": 2200
},
{
"epoch": 1.421271159374002,
"grad_norm": 6.063320159912109,
"learning_rate": 6.16888888888889e-06,
"loss": 0.6615,
"step": 2225
},
{
"epoch": 1.4372404982433729,
"grad_norm": 5.78763484954834,
"learning_rate": 6.113333333333333e-06,
"loss": 0.6308,
"step": 2250
},
{
"epoch": 1.4532098371127435,
"grad_norm": 5.714231014251709,
"learning_rate": 6.057777777777778e-06,
"loss": 0.6619,
"step": 2275
},
{
"epoch": 1.4691791759821142,
"grad_norm": 7.075171947479248,
"learning_rate": 6.002222222222223e-06,
"loss": 0.6748,
"step": 2300
},
{
"epoch": 1.4851485148514851,
"grad_norm": 6.3147969245910645,
"learning_rate": 5.946666666666668e-06,
"loss": 0.6441,
"step": 2325
},
{
"epoch": 1.501117853720856,
"grad_norm": 6.079730033874512,
"learning_rate": 5.891111111111112e-06,
"loss": 0.6303,
"step": 2350
},
{
"epoch": 1.5170871925902267,
"grad_norm": 5.132657527923584,
"learning_rate": 5.8355555555555565e-06,
"loss": 0.6718,
"step": 2375
},
{
"epoch": 1.5330565314595974,
"grad_norm": 5.566254138946533,
"learning_rate": 5.78e-06,
"loss": 0.6686,
"step": 2400
},
{
"epoch": 1.5490258703289683,
"grad_norm": 6.299161911010742,
"learning_rate": 5.724444444444445e-06,
"loss": 0.6485,
"step": 2425
},
{
"epoch": 1.5649952091983392,
"grad_norm": 5.586335182189941,
"learning_rate": 5.6688888888888895e-06,
"loss": 0.6031,
"step": 2450
},
{
"epoch": 1.5809645480677101,
"grad_norm": 5.396686553955078,
"learning_rate": 5.613333333333334e-06,
"loss": 0.6407,
"step": 2475
},
{
"epoch": 1.5969338869370808,
"grad_norm": 6.483650207519531,
"learning_rate": 5.557777777777778e-06,
"loss": 0.6703,
"step": 2500
},
{
"epoch": 1.6129032258064515,
"grad_norm": 5.946717739105225,
"learning_rate": 5.5022222222222224e-06,
"loss": 0.6079,
"step": 2525
},
{
"epoch": 1.6288725646758224,
"grad_norm": 5.817110061645508,
"learning_rate": 5.4466666666666665e-06,
"loss": 0.6275,
"step": 2550
},
{
"epoch": 1.6448419035451933,
"grad_norm": 5.888713836669922,
"learning_rate": 5.391111111111111e-06,
"loss": 0.64,
"step": 2575
},
{
"epoch": 1.660811242414564,
"grad_norm": 5.296940803527832,
"learning_rate": 5.335555555555556e-06,
"loss": 0.6278,
"step": 2600
},
{
"epoch": 1.6767805812839347,
"grad_norm": 5.4542388916015625,
"learning_rate": 5.28e-06,
"loss": 0.6236,
"step": 2625
},
{
"epoch": 1.6927499201533056,
"grad_norm": 5.980891227722168,
"learning_rate": 5.224444444444445e-06,
"loss": 0.6607,
"step": 2650
},
{
"epoch": 1.7087192590226765,
"grad_norm": 7.3297014236450195,
"learning_rate": 5.168888888888889e-06,
"loss": 0.673,
"step": 2675
},
{
"epoch": 1.7246885978920474,
"grad_norm": 4.739117622375488,
"learning_rate": 5.113333333333333e-06,
"loss": 0.5893,
"step": 2700
},
{
"epoch": 1.7406579367614181,
"grad_norm": 4.754851818084717,
"learning_rate": 5.057777777777778e-06,
"loss": 0.6459,
"step": 2725
},
{
"epoch": 1.7566272756307888,
"grad_norm": 4.72427225112915,
"learning_rate": 5.002222222222223e-06,
"loss": 0.6093,
"step": 2750
},
{
"epoch": 1.7725966145001597,
"grad_norm": 5.871730327606201,
"learning_rate": 4.946666666666667e-06,
"loss": 0.6088,
"step": 2775
},
{
"epoch": 1.7885659533695306,
"grad_norm": 5.640221118927002,
"learning_rate": 4.891111111111111e-06,
"loss": 0.6214,
"step": 2800
},
{
"epoch": 1.8045352922389013,
"grad_norm": 5.749293327331543,
"learning_rate": 4.835555555555556e-06,
"loss": 0.6105,
"step": 2825
},
{
"epoch": 1.820504631108272,
"grad_norm": 6.414817810058594,
"learning_rate": 4.78e-06,
"loss": 0.6523,
"step": 2850
},
{
"epoch": 1.836473969977643,
"grad_norm": 5.655066967010498,
"learning_rate": 4.724444444444445e-06,
"loss": 0.6348,
"step": 2875
},
{
"epoch": 1.8524433088470138,
"grad_norm": 5.395172119140625,
"learning_rate": 4.66888888888889e-06,
"loss": 0.634,
"step": 2900
},
{
"epoch": 1.8684126477163845,
"grad_norm": 4.894425868988037,
"learning_rate": 4.613333333333334e-06,
"loss": 0.595,
"step": 2925
},
{
"epoch": 1.8843819865857554,
"grad_norm": 5.686845779418945,
"learning_rate": 4.557777777777778e-06,
"loss": 0.6113,
"step": 2950
},
{
"epoch": 1.900351325455126,
"grad_norm": 5.444372177124023,
"learning_rate": 4.502222222222223e-06,
"loss": 0.6277,
"step": 2975
},
{
"epoch": 1.916320664324497,
"grad_norm": 5.224650859832764,
"learning_rate": 4.446666666666667e-06,
"loss": 0.6601,
"step": 3000
},
{
"epoch": 1.916320664324497,
"eval_loss": 0.389792263507843,
"eval_runtime": 1666.4367,
"eval_samples_per_second": 2.341,
"eval_steps_per_second": 0.146,
"eval_wer": 0.2912798327905507,
"step": 3000
},
{
"epoch": 1.932290003193868,
"grad_norm": 4.736299514770508,
"learning_rate": 4.391111111111112e-06,
"loss": 0.6351,
"step": 3025
},
{
"epoch": 1.9482593420632386,
"grad_norm": 4.797982692718506,
"learning_rate": 4.3355555555555565e-06,
"loss": 0.6017,
"step": 3050
},
{
"epoch": 1.9642286809326093,
"grad_norm": 5.4142022132873535,
"learning_rate": 4.2800000000000005e-06,
"loss": 0.6124,
"step": 3075
},
{
"epoch": 1.9801980198019802,
"grad_norm": 5.374582767486572,
"learning_rate": 4.2244444444444446e-06,
"loss": 0.6243,
"step": 3100
},
{
"epoch": 1.996167358671351,
"grad_norm": 4.480958938598633,
"learning_rate": 4.168888888888889e-06,
"loss": 0.5981,
"step": 3125
},
{
"epoch": 2.012136697540722,
"grad_norm": 5.1775736808776855,
"learning_rate": 4.1133333333333335e-06,
"loss": 0.4607,
"step": 3150
},
{
"epoch": 2.0281060364100925,
"grad_norm": 4.18350887298584,
"learning_rate": 4.057777777777778e-06,
"loss": 0.3907,
"step": 3175
},
{
"epoch": 2.0440753752794634,
"grad_norm": 4.902673721313477,
"learning_rate": 4.002222222222222e-06,
"loss": 0.4004,
"step": 3200
},
{
"epoch": 2.0600447141488343,
"grad_norm": 4.528185844421387,
"learning_rate": 3.946666666666667e-06,
"loss": 0.3936,
"step": 3225
},
{
"epoch": 2.076014053018205,
"grad_norm": 5.0421061515808105,
"learning_rate": 3.891111111111111e-06,
"loss": 0.4242,
"step": 3250
},
{
"epoch": 2.0919833918875756,
"grad_norm": 4.861361026763916,
"learning_rate": 3.835555555555555e-06,
"loss": 0.3916,
"step": 3275
},
{
"epoch": 2.1079527307569466,
"grad_norm": 6.367924213409424,
"learning_rate": 3.7800000000000002e-06,
"loss": 0.413,
"step": 3300
},
{
"epoch": 2.1239220696263175,
"grad_norm": 3.6156017780303955,
"learning_rate": 3.724444444444445e-06,
"loss": 0.4103,
"step": 3325
},
{
"epoch": 2.1398914084956884,
"grad_norm": 5.132371425628662,
"learning_rate": 3.668888888888889e-06,
"loss": 0.407,
"step": 3350
},
{
"epoch": 2.1558607473650593,
"grad_norm": 4.446967601776123,
"learning_rate": 3.6133333333333336e-06,
"loss": 0.4082,
"step": 3375
},
{
"epoch": 2.1718300862344297,
"grad_norm": 5.18086576461792,
"learning_rate": 3.5577777777777785e-06,
"loss": 0.4053,
"step": 3400
},
{
"epoch": 2.1877994251038007,
"grad_norm": 4.955840587615967,
"learning_rate": 3.5022222222222225e-06,
"loss": 0.3995,
"step": 3425
},
{
"epoch": 2.2037687639731716,
"grad_norm": 4.59506368637085,
"learning_rate": 3.446666666666667e-06,
"loss": 0.4064,
"step": 3450
},
{
"epoch": 2.2197381028425425,
"grad_norm": 5.527169227600098,
"learning_rate": 3.391111111111111e-06,
"loss": 0.3946,
"step": 3475
},
{
"epoch": 2.235707441711913,
"grad_norm": 5.034458637237549,
"learning_rate": 3.335555555555556e-06,
"loss": 0.4088,
"step": 3500
},
{
"epoch": 2.251676780581284,
"grad_norm": 4.814046382904053,
"learning_rate": 3.2800000000000004e-06,
"loss": 0.4032,
"step": 3525
},
{
"epoch": 2.2676461194506548,
"grad_norm": 5.115776062011719,
"learning_rate": 3.2244444444444444e-06,
"loss": 0.4009,
"step": 3550
},
{
"epoch": 2.2836154583200257,
"grad_norm": 4.4629130363464355,
"learning_rate": 3.1688888888888893e-06,
"loss": 0.408,
"step": 3575
},
{
"epoch": 2.2995847971893966,
"grad_norm": 4.914193153381348,
"learning_rate": 3.1133333333333337e-06,
"loss": 0.3867,
"step": 3600
},
{
"epoch": 2.315554136058767,
"grad_norm": 4.358288288116455,
"learning_rate": 3.0577777777777778e-06,
"loss": 0.4485,
"step": 3625
},
{
"epoch": 2.331523474928138,
"grad_norm": 4.8634161949157715,
"learning_rate": 3.0022222222222227e-06,
"loss": 0.389,
"step": 3650
},
{
"epoch": 2.347492813797509,
"grad_norm": 5.283102989196777,
"learning_rate": 2.946666666666667e-06,
"loss": 0.395,
"step": 3675
},
{
"epoch": 2.3634621526668798,
"grad_norm": 4.393892765045166,
"learning_rate": 2.891111111111111e-06,
"loss": 0.4047,
"step": 3700
},
{
"epoch": 2.37943149153625,
"grad_norm": 5.246931552886963,
"learning_rate": 2.835555555555556e-06,
"loss": 0.3957,
"step": 3725
},
{
"epoch": 2.395400830405621,
"grad_norm": 4.599190711975098,
"learning_rate": 2.7800000000000005e-06,
"loss": 0.3829,
"step": 3750
},
{
"epoch": 2.411370169274992,
"grad_norm": 5.136366367340088,
"learning_rate": 2.7244444444444445e-06,
"loss": 0.4029,
"step": 3775
},
{
"epoch": 2.427339508144363,
"grad_norm": 4.448164463043213,
"learning_rate": 2.6688888888888894e-06,
"loss": 0.3869,
"step": 3800
},
{
"epoch": 2.443308847013734,
"grad_norm": 5.434977054595947,
"learning_rate": 2.6133333333333334e-06,
"loss": 0.3871,
"step": 3825
},
{
"epoch": 2.4592781858831043,
"grad_norm": 5.272222518920898,
"learning_rate": 2.557777777777778e-06,
"loss": 0.388,
"step": 3850
},
{
"epoch": 2.4752475247524752,
"grad_norm": 5.163560390472412,
"learning_rate": 2.5022222222222224e-06,
"loss": 0.4156,
"step": 3875
},
{
"epoch": 2.491216863621846,
"grad_norm": 4.091150283813477,
"learning_rate": 2.446666666666667e-06,
"loss": 0.4093,
"step": 3900
},
{
"epoch": 2.5071862024912166,
"grad_norm": 4.858378887176514,
"learning_rate": 2.3911111111111113e-06,
"loss": 0.4068,
"step": 3925
},
{
"epoch": 2.5231555413605875,
"grad_norm": 5.535120487213135,
"learning_rate": 2.3355555555555557e-06,
"loss": 0.3992,
"step": 3950
},
{
"epoch": 2.5391248802299584,
"grad_norm": 4.70937442779541,
"learning_rate": 2.28e-06,
"loss": 0.4009,
"step": 3975
},
{
"epoch": 2.5550942190993293,
"grad_norm": 5.273413181304932,
"learning_rate": 2.2244444444444447e-06,
"loss": 0.3887,
"step": 4000
},
{
"epoch": 2.5550942190993293,
"eval_loss": 0.3953465223312378,
"eval_runtime": 1634.3629,
"eval_samples_per_second": 2.387,
"eval_steps_per_second": 0.149,
"eval_wer": 0.2815826568803772,
"step": 4000
},
{
"epoch": 2.5710635579687002,
"grad_norm": 5.2189483642578125,
"learning_rate": 2.168888888888889e-06,
"loss": 0.3789,
"step": 4025
},
{
"epoch": 2.587032896838071,
"grad_norm": 5.335792064666748,
"learning_rate": 2.1133333333333336e-06,
"loss": 0.3911,
"step": 4050
},
{
"epoch": 2.6030022357074416,
"grad_norm": 4.469089508056641,
"learning_rate": 2.057777777777778e-06,
"loss": 0.388,
"step": 4075
},
{
"epoch": 2.6189715745768125,
"grad_norm": 4.454526424407959,
"learning_rate": 2.0022222222222225e-06,
"loss": 0.3829,
"step": 4100
},
{
"epoch": 2.6349409134461834,
"grad_norm": 5.536740303039551,
"learning_rate": 1.9466666666666665e-06,
"loss": 0.3846,
"step": 4125
},
{
"epoch": 2.650910252315554,
"grad_norm": 4.877845764160156,
"learning_rate": 1.8911111111111114e-06,
"loss": 0.4116,
"step": 4150
},
{
"epoch": 2.666879591184925,
"grad_norm": 5.3322014808654785,
"learning_rate": 1.8355555555555557e-06,
"loss": 0.3857,
"step": 4175
},
{
"epoch": 2.6828489300542957,
"grad_norm": 4.9132866859436035,
"learning_rate": 1.7800000000000001e-06,
"loss": 0.3922,
"step": 4200
},
{
"epoch": 2.6988182689236666,
"grad_norm": 5.380650043487549,
"learning_rate": 1.7244444444444448e-06,
"loss": 0.3757,
"step": 4225
},
{
"epoch": 2.7147876077930375,
"grad_norm": 4.5387282371521,
"learning_rate": 1.668888888888889e-06,
"loss": 0.3919,
"step": 4250
},
{
"epoch": 2.7307569466624084,
"grad_norm": 5.215519428253174,
"learning_rate": 1.6133333333333335e-06,
"loss": 0.4104,
"step": 4275
},
{
"epoch": 2.746726285531779,
"grad_norm": 6.126536846160889,
"learning_rate": 1.5577777777777777e-06,
"loss": 0.3792,
"step": 4300
},
{
"epoch": 2.76269562440115,
"grad_norm": 5.707647323608398,
"learning_rate": 1.5022222222222224e-06,
"loss": 0.4041,
"step": 4325
},
{
"epoch": 2.7786649632705207,
"grad_norm": 5.024244785308838,
"learning_rate": 1.4466666666666669e-06,
"loss": 0.4045,
"step": 4350
},
{
"epoch": 2.794634302139891,
"grad_norm": 5.240878105163574,
"learning_rate": 1.3911111111111111e-06,
"loss": 0.4022,
"step": 4375
},
{
"epoch": 2.810603641009262,
"grad_norm": 5.824533939361572,
"learning_rate": 1.3355555555555558e-06,
"loss": 0.3947,
"step": 4400
},
{
"epoch": 2.826572979878633,
"grad_norm": 4.228843688964844,
"learning_rate": 1.28e-06,
"loss": 0.3725,
"step": 4425
},
{
"epoch": 2.842542318748004,
"grad_norm": 4.704908847808838,
"learning_rate": 1.2244444444444445e-06,
"loss": 0.4001,
"step": 4450
},
{
"epoch": 2.858511657617375,
"grad_norm": 4.397124767303467,
"learning_rate": 1.168888888888889e-06,
"loss": 0.381,
"step": 4475
},
{
"epoch": 2.8744809964867457,
"grad_norm": 4.7757062911987305,
"learning_rate": 1.1133333333333334e-06,
"loss": 0.3689,
"step": 4500
},
{
"epoch": 2.890450335356116,
"grad_norm": 4.855478763580322,
"learning_rate": 1.0577777777777779e-06,
"loss": 0.373,
"step": 4525
},
{
"epoch": 2.906419674225487,
"grad_norm": 5.458982467651367,
"learning_rate": 1.0022222222222223e-06,
"loss": 0.3919,
"step": 4550
},
{
"epoch": 2.922389013094858,
"grad_norm": 3.9116828441619873,
"learning_rate": 9.466666666666667e-07,
"loss": 0.4143,
"step": 4575
},
{
"epoch": 2.9383583519642285,
"grad_norm": 4.8182878494262695,
"learning_rate": 8.911111111111112e-07,
"loss": 0.3726,
"step": 4600
},
{
"epoch": 2.9543276908335994,
"grad_norm": 5.018452167510986,
"learning_rate": 8.355555555555556e-07,
"loss": 0.4018,
"step": 4625
},
{
"epoch": 2.9702970297029703,
"grad_norm": 5.315720081329346,
"learning_rate": 7.8e-07,
"loss": 0.3648,
"step": 4650
},
{
"epoch": 2.986266368572341,
"grad_norm": 4.65333890914917,
"learning_rate": 7.266666666666668e-07,
"loss": 0.3865,
"step": 4675
},
{
"epoch": 3.002235707441712,
"grad_norm": 2.9384920597076416,
"learning_rate": 6.711111111111111e-07,
"loss": 0.3793,
"step": 4700
},
{
"epoch": 3.0182050463110826,
"grad_norm": 4.572001934051514,
"learning_rate": 6.155555555555556e-07,
"loss": 0.2557,
"step": 4725
},
{
"epoch": 3.0341743851804535,
"grad_norm": 3.237912654876709,
"learning_rate": 5.6e-07,
"loss": 0.252,
"step": 4750
},
{
"epoch": 3.0501437240498244,
"grad_norm": 3.8456506729125977,
"learning_rate": 5.044444444444445e-07,
"loss": 0.2556,
"step": 4775
},
{
"epoch": 3.0661130629191953,
"grad_norm": 4.627549171447754,
"learning_rate": 4.488888888888889e-07,
"loss": 0.2618,
"step": 4800
},
{
"epoch": 3.0820824017885657,
"grad_norm": 4.322127342224121,
"learning_rate": 3.9333333333333336e-07,
"loss": 0.2715,
"step": 4825
},
{
"epoch": 3.0980517406579366,
"grad_norm": 4.883068084716797,
"learning_rate": 3.3777777777777777e-07,
"loss": 0.2585,
"step": 4850
},
{
"epoch": 3.1140210795273076,
"grad_norm": 4.687808513641357,
"learning_rate": 2.822222222222222e-07,
"loss": 0.2593,
"step": 4875
},
{
"epoch": 3.1299904183966785,
"grad_norm": 4.111570835113525,
"learning_rate": 2.266666666666667e-07,
"loss": 0.2645,
"step": 4900
},
{
"epoch": 3.1459597572660494,
"grad_norm": 4.777975082397461,
"learning_rate": 1.7111111111111114e-07,
"loss": 0.2594,
"step": 4925
},
{
"epoch": 3.16192909613542,
"grad_norm": 4.395523548126221,
"learning_rate": 1.1555555555555556e-07,
"loss": 0.2665,
"step": 4950
},
{
"epoch": 3.1778984350047907,
"grad_norm": 5.332032203674316,
"learning_rate": 6.000000000000001e-08,
"loss": 0.2772,
"step": 4975
},
{
"epoch": 3.1938677738741617,
"grad_norm": 4.1832733154296875,
"learning_rate": 4.444444444444445e-09,
"loss": 0.2637,
"step": 5000
},
{
"epoch": 3.1938677738741617,
"eval_loss": 0.4074931740760803,
"eval_runtime": 1670.4337,
"eval_samples_per_second": 2.335,
"eval_steps_per_second": 0.146,
"eval_wer": 0.2823846789481359,
"step": 5000
},
{
"epoch": 3.1938677738741617,
"step": 5000,
"total_flos": 5.435895365546803e+20,
"train_loss": 0.7105956988334656,
"train_runtime": 59674.9515,
"train_samples_per_second": 2.681,
"train_steps_per_second": 0.084
}
],
"logging_steps": 25,
"max_steps": 5000,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 5.435895365546803e+20,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}