End of training

Browse files

Files changed (6) hide show

README.md +1 -1
all_results.json +15 -0
eval_results.json +9 -0
runs/Nov04_13-33-34_b8b767113626/events.out.tfevents.1730790046.b8b767113626.478.1 +3 -0
train_results.json +9 -0
trainer_state.json +1487 -0

README.md CHANGED Viewed

@@ -16,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
 # whisper-large-v3-ft-btbn-ca
-This model is a fine-tuned version of [openai/whisper-large-v3](https://huggingface.co/openai/whisper-large-v3) on an unknown dataset.
 It achieves the following results on the evaluation set:
 - Loss: 0.4075
 - Wer: 0.2824

 # whisper-large-v3-ft-btbn-ca
+This model is a fine-tuned version of [openai/whisper-large-v3](https://huggingface.co/openai/whisper-large-v3) on the DewiBrynJones/banc-trawsgrifiadau-bangor train main, cymen-arfor/15awr train+dev+test main dataset.
 It achieves the following results on the evaluation set:
 - Loss: 0.4075
 - Wer: 0.2824

all_results.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+    "epoch": 3.1938677738741617,
+    "eval_loss": 0.4074931740760803,
+    "eval_runtime": 1638.5657,
+    "eval_samples": 3901,
+    "eval_samples_per_second": 2.381,
+    "eval_steps_per_second": 0.149,
+    "eval_wer": 0.2823846789481359,
+    "total_flos": 5.435895365546803e+20,
+    "train_loss": 0.7105956988334656,
+    "train_runtime": 59674.9515,
+    "train_samples": 50095,
+    "train_samples_per_second": 2.681,
+    "train_steps_per_second": 0.084
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 3.1938677738741617,
+    "eval_loss": 0.4074931740760803,
+    "eval_runtime": 1638.5657,
+    "eval_samples": 3901,
+    "eval_samples_per_second": 2.381,
+    "eval_steps_per_second": 0.149,
+    "eval_wer": 0.2823846789481359
+}

runs/Nov04_13-33-34_b8b767113626/events.out.tfevents.1730790046.b8b767113626.478.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b7168eff838fb3a36589d7f20fc726d82a301aeb640085d791f4d58b6535f765
+size 406

train_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 3.1938677738741617,
+    "total_flos": 5.435895365546803e+20,
+    "train_loss": 0.7105956988334656,
+    "train_runtime": 59674.9515,
+    "train_samples": 50095,
+    "train_samples_per_second": 2.681,
+    "train_steps_per_second": 0.084
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,1487 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.1938677738741617,
+  "eval_steps": 1000,
+  "global_step": 5000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.015969338869370808,
+      "grad_norm": 17.92350196838379,
+      "learning_rate": 4.800000000000001e-07,
+      "loss": 3.223,
+      "step": 25
+    },
+    {
+      "epoch": 0.031938677738741615,
+      "grad_norm": 11.101584434509277,
+      "learning_rate": 9.800000000000001e-07,
+      "loss": 2.5321,
+      "step": 50
+    },
+    {
+      "epoch": 0.047908016608112426,
+      "grad_norm": 9.936028480529785,
+      "learning_rate": 1.48e-06,
+      "loss": 2.0032,
+      "step": 75
+    },
+    {
+      "epoch": 0.06387735547748323,
+      "grad_norm": 10.770648956298828,
+      "learning_rate": 1.98e-06,
+      "loss": 1.6134,
+      "step": 100
+    },
+    {
+      "epoch": 0.07984669434685404,
+      "grad_norm": 10.68463134765625,
+      "learning_rate": 2.4800000000000004e-06,
+      "loss": 1.5694,
+      "step": 125
+    },
+    {
+      "epoch": 0.09581603321622485,
+      "grad_norm": 10.635124206542969,
+      "learning_rate": 2.9800000000000003e-06,
+      "loss": 1.4934,
+      "step": 150
+    },
+    {
+      "epoch": 0.11178537208559565,
+      "grad_norm": 9.225995063781738,
+      "learning_rate": 3.48e-06,
+      "loss": 1.518,
+      "step": 175
+    },
+    {
+      "epoch": 0.12775471095496646,
+      "grad_norm": 9.05696964263916,
+      "learning_rate": 3.980000000000001e-06,
+      "loss": 1.4258,
+      "step": 200
+    },
+    {
+      "epoch": 0.14372404982433729,
+      "grad_norm": 9.599803924560547,
+      "learning_rate": 4.48e-06,
+      "loss": 1.422,
+      "step": 225
+    },
+    {
+      "epoch": 0.15969338869370808,
+      "grad_norm": 9.442488670349121,
+      "learning_rate": 4.980000000000001e-06,
+      "loss": 1.3208,
+      "step": 250
+    },
+    {
+      "epoch": 0.17566272756307888,
+      "grad_norm": 10.746581077575684,
+      "learning_rate": 5.480000000000001e-06,
+      "loss": 1.3997,
+      "step": 275
+    },
+    {
+      "epoch": 0.1916320664324497,
+      "grad_norm": 9.297645568847656,
+      "learning_rate": 5.98e-06,
+      "loss": 1.3343,
+      "step": 300
+    },
+    {
+      "epoch": 0.2076014053018205,
+      "grad_norm": 10.026762962341309,
+      "learning_rate": 6.480000000000001e-06,
+      "loss": 1.3353,
+      "step": 325
+    },
+    {
+      "epoch": 0.2235707441711913,
+      "grad_norm": 9.392842292785645,
+      "learning_rate": 6.98e-06,
+      "loss": 1.3451,
+      "step": 350
+    },
+    {
+      "epoch": 0.23954008304056212,
+      "grad_norm": 7.376865386962891,
+      "learning_rate": 7.48e-06,
+      "loss": 1.2645,
+      "step": 375
+    },
+    {
+      "epoch": 0.2555094219099329,
+      "grad_norm": 10.001627922058105,
+      "learning_rate": 7.980000000000002e-06,
+      "loss": 1.271,
+      "step": 400
+    },
+    {
+      "epoch": 0.2714787607793037,
+      "grad_norm": 7.699875354766846,
+      "learning_rate": 8.48e-06,
+      "loss": 1.2116,
+      "step": 425
+    },
+    {
+      "epoch": 0.28744809964867457,
+      "grad_norm": 8.733208656311035,
+      "learning_rate": 8.98e-06,
+      "loss": 1.2222,
+      "step": 450
+    },
+    {
+      "epoch": 0.30341743851804537,
+      "grad_norm": 8.286600112915039,
+      "learning_rate": 9.48e-06,
+      "loss": 1.2134,
+      "step": 475
+    },
+    {
+      "epoch": 0.31938677738741617,
+      "grad_norm": 7.408142566680908,
+      "learning_rate": 9.980000000000001e-06,
+      "loss": 1.1662,
+      "step": 500
+    },
+    {
+      "epoch": 0.33535611625678696,
+      "grad_norm": 9.274364471435547,
+      "learning_rate": 9.946666666666667e-06,
+      "loss": 1.2752,
+      "step": 525
+    },
+    {
+      "epoch": 0.35132545512615776,
+      "grad_norm": 8.4090576171875,
+      "learning_rate": 9.891111111111113e-06,
+      "loss": 1.2137,
+      "step": 550
+    },
+    {
+      "epoch": 0.3672947939955286,
+      "grad_norm": 8.74751091003418,
+      "learning_rate": 9.835555555555556e-06,
+      "loss": 1.1794,
+      "step": 575
+    },
+    {
+      "epoch": 0.3832641328648994,
+      "grad_norm": 7.037300109863281,
+      "learning_rate": 9.780000000000001e-06,
+      "loss": 1.1806,
+      "step": 600
+    },
+    {
+      "epoch": 0.3992334717342702,
+      "grad_norm": 8.758708953857422,
+      "learning_rate": 9.724444444444445e-06,
+      "loss": 1.0997,
+      "step": 625
+    },
+    {
+      "epoch": 0.415202810603641,
+      "grad_norm": 7.583318710327148,
+      "learning_rate": 9.66888888888889e-06,
+      "loss": 1.1122,
+      "step": 650
+    },
+    {
+      "epoch": 0.4311721494730118,
+      "grad_norm": 7.173635959625244,
+      "learning_rate": 9.613333333333335e-06,
+      "loss": 1.1167,
+      "step": 675
+    },
+    {
+      "epoch": 0.4471414883423826,
+      "grad_norm": 6.702845096588135,
+      "learning_rate": 9.557777777777777e-06,
+      "loss": 1.0873,
+      "step": 700
+    },
+    {
+      "epoch": 0.46311082721175345,
+      "grad_norm": 7.966395854949951,
+      "learning_rate": 9.502222222222223e-06,
+      "loss": 1.0764,
+      "step": 725
+    },
+    {
+      "epoch": 0.47908016608112425,
+      "grad_norm": 7.768787860870361,
+      "learning_rate": 9.446666666666667e-06,
+      "loss": 1.0985,
+      "step": 750
+    },
+    {
+      "epoch": 0.49504950495049505,
+      "grad_norm": 8.754780769348145,
+      "learning_rate": 9.391111111111111e-06,
+      "loss": 1.0646,
+      "step": 775
+    },
+    {
+      "epoch": 0.5110188438198658,
+      "grad_norm": 7.156813144683838,
+      "learning_rate": 9.335555555555557e-06,
+      "loss": 1.0149,
+      "step": 800
+    },
+    {
+      "epoch": 0.5269881826892366,
+      "grad_norm": 6.864979267120361,
+      "learning_rate": 9.280000000000001e-06,
+      "loss": 1.0514,
+      "step": 825
+    },
+    {
+      "epoch": 0.5429575215586074,
+      "grad_norm": 6.696143627166748,
+      "learning_rate": 9.224444444444445e-06,
+      "loss": 1.0376,
+      "step": 850
+    },
+    {
+      "epoch": 0.5589268604279782,
+      "grad_norm": 6.3973469734191895,
+      "learning_rate": 9.168888888888889e-06,
+      "loss": 0.992,
+      "step": 875
+    },
+    {
+      "epoch": 0.5748961992973491,
+      "grad_norm": 6.48210334777832,
+      "learning_rate": 9.113333333333335e-06,
+      "loss": 1.0463,
+      "step": 900
+    },
+    {
+      "epoch": 0.5908655381667199,
+      "grad_norm": 7.711927890777588,
+      "learning_rate": 9.057777777777779e-06,
+      "loss": 1.0023,
+      "step": 925
+    },
+    {
+      "epoch": 0.6068348770360907,
+      "grad_norm": 7.2077250480651855,
+      "learning_rate": 9.002222222222223e-06,
+      "loss": 1.0283,
+      "step": 950
+    },
+    {
+      "epoch": 0.6228042159054615,
+      "grad_norm": 7.416881561279297,
+      "learning_rate": 8.946666666666669e-06,
+      "loss": 1.0249,
+      "step": 975
+    },
+    {
+      "epoch": 0.6387735547748323,
+      "grad_norm": 6.398010730743408,
+      "learning_rate": 8.891111111111111e-06,
+      "loss": 0.9546,
+      "step": 1000
+    },
+    {
+      "epoch": 0.6387735547748323,
+      "eval_loss": 0.4855521023273468,
+      "eval_runtime": 1667.9567,
+      "eval_samples_per_second": 2.339,
+      "eval_steps_per_second": 0.146,
+      "eval_wer": 0.3615418266660186,
+      "step": 1000
+    },
+    {
+      "epoch": 0.6547428936442031,
+      "grad_norm": 5.443120956420898,
+      "learning_rate": 8.835555555555557e-06,
+      "loss": 1.013,
+      "step": 1025
+    },
+    {
+      "epoch": 0.6707122325135739,
+      "grad_norm": 7.361939907073975,
+      "learning_rate": 8.78e-06,
+      "loss": 0.946,
+      "step": 1050
+    },
+    {
+      "epoch": 0.6866815713829447,
+      "grad_norm": 6.80987024307251,
+      "learning_rate": 8.724444444444445e-06,
+      "loss": 0.9945,
+      "step": 1075
+    },
+    {
+      "epoch": 0.7026509102523155,
+      "grad_norm": 6.040420055389404,
+      "learning_rate": 8.66888888888889e-06,
+      "loss": 1.0078,
+      "step": 1100
+    },
+    {
+      "epoch": 0.7186202491216863,
+      "grad_norm": 7.904597282409668,
+      "learning_rate": 8.613333333333333e-06,
+      "loss": 0.9707,
+      "step": 1125
+    },
+    {
+      "epoch": 0.7345895879910572,
+      "grad_norm": 6.9557204246521,
+      "learning_rate": 8.557777777777778e-06,
+      "loss": 0.9712,
+      "step": 1150
+    },
+    {
+      "epoch": 0.750558926860428,
+      "grad_norm": 5.4510178565979,
+      "learning_rate": 8.502222222222223e-06,
+      "loss": 0.9659,
+      "step": 1175
+    },
+    {
+      "epoch": 0.7665282657297988,
+      "grad_norm": 5.72000789642334,
+      "learning_rate": 8.446666666666668e-06,
+      "loss": 0.9368,
+      "step": 1200
+    },
+    {
+      "epoch": 0.7824976045991696,
+      "grad_norm": 5.816184043884277,
+      "learning_rate": 8.391111111111112e-06,
+      "loss": 0.9089,
+      "step": 1225
+    },
+    {
+      "epoch": 0.7984669434685404,
+      "grad_norm": 6.033445835113525,
+      "learning_rate": 8.335555555555556e-06,
+      "loss": 0.9502,
+      "step": 1250
+    },
+    {
+      "epoch": 0.8144362823379112,
+      "grad_norm": 5.273908615112305,
+      "learning_rate": 8.28e-06,
+      "loss": 0.9151,
+      "step": 1275
+    },
+    {
+      "epoch": 0.830405621207282,
+      "grad_norm": 6.122651100158691,
+      "learning_rate": 8.224444444444444e-06,
+      "loss": 0.9517,
+      "step": 1300
+    },
+    {
+      "epoch": 0.8463749600766528,
+      "grad_norm": 6.673498630523682,
+      "learning_rate": 8.16888888888889e-06,
+      "loss": 1.0318,
+      "step": 1325
+    },
+    {
+      "epoch": 0.8623442989460236,
+      "grad_norm": 7.197372913360596,
+      "learning_rate": 8.113333333333334e-06,
+      "loss": 0.937,
+      "step": 1350
+    },
+    {
+      "epoch": 0.8783136378153944,
+      "grad_norm": 6.718713283538818,
+      "learning_rate": 8.057777777777778e-06,
+      "loss": 0.9325,
+      "step": 1375
+    },
+    {
+      "epoch": 0.8942829766847652,
+      "grad_norm": 7.090008735656738,
+      "learning_rate": 8.002222222222222e-06,
+      "loss": 0.9044,
+      "step": 1400
+    },
+    {
+      "epoch": 0.9102523155541361,
+      "grad_norm": 8.890588760375977,
+      "learning_rate": 7.946666666666666e-06,
+      "loss": 0.8935,
+      "step": 1425
+    },
+    {
+      "epoch": 0.9262216544235069,
+      "grad_norm": 5.521778583526611,
+      "learning_rate": 7.891111111111112e-06,
+      "loss": 0.8857,
+      "step": 1450
+    },
+    {
+      "epoch": 0.9421909932928777,
+      "grad_norm": 6.829771041870117,
+      "learning_rate": 7.835555555555556e-06,
+      "loss": 0.9411,
+      "step": 1475
+    },
+    {
+      "epoch": 0.9581603321622485,
+      "grad_norm": 6.48971700668335,
+      "learning_rate": 7.78e-06,
+      "loss": 0.8616,
+      "step": 1500
+    },
+    {
+      "epoch": 0.9741296710316193,
+      "grad_norm": 5.809508323669434,
+      "learning_rate": 7.724444444444446e-06,
+      "loss": 0.8976,
+      "step": 1525
+    },
+    {
+      "epoch": 0.9900990099009901,
+      "grad_norm": 6.918030738830566,
+      "learning_rate": 7.66888888888889e-06,
+      "loss": 0.9213,
+      "step": 1550
+    },
+    {
+      "epoch": 1.006068348770361,
+      "grad_norm": 5.847545623779297,
+      "learning_rate": 7.613333333333334e-06,
+      "loss": 0.813,
+      "step": 1575
+    },
+    {
+      "epoch": 1.0220376876397317,
+      "grad_norm": 5.331517219543457,
+      "learning_rate": 7.557777777777779e-06,
+      "loss": 0.6469,
+      "step": 1600
+    },
+    {
+      "epoch": 1.0380070265091026,
+      "grad_norm": 7.251556396484375,
+      "learning_rate": 7.502222222222223e-06,
+      "loss": 0.646,
+      "step": 1625
+    },
+    {
+      "epoch": 1.0539763653784733,
+      "grad_norm": 5.755091667175293,
+      "learning_rate": 7.446666666666668e-06,
+      "loss": 0.6496,
+      "step": 1650
+    },
+    {
+      "epoch": 1.0699457042478442,
+      "grad_norm": 5.709854602813721,
+      "learning_rate": 7.3911111111111125e-06,
+      "loss": 0.6459,
+      "step": 1675
+    },
+    {
+      "epoch": 1.0859150431172149,
+      "grad_norm": 5.866008281707764,
+      "learning_rate": 7.335555555555556e-06,
+      "loss": 0.6629,
+      "step": 1700
+    },
+    {
+      "epoch": 1.1018843819865858,
+      "grad_norm": 5.575136184692383,
+      "learning_rate": 7.280000000000001e-06,
+      "loss": 0.6724,
+      "step": 1725
+    },
+    {
+      "epoch": 1.1178537208559565,
+      "grad_norm": 5.722553730010986,
+      "learning_rate": 7.224444444444445e-06,
+      "loss": 0.6946,
+      "step": 1750
+    },
+    {
+      "epoch": 1.1338230597253274,
+      "grad_norm": 5.85053825378418,
+      "learning_rate": 7.1688888888888895e-06,
+      "loss": 0.6676,
+      "step": 1775
+    },
+    {
+      "epoch": 1.1497923985946983,
+      "grad_norm": 6.432461261749268,
+      "learning_rate": 7.113333333333334e-06,
+      "loss": 0.6381,
+      "step": 1800
+    },
+    {
+      "epoch": 1.165761737464069,
+      "grad_norm": 6.030885696411133,
+      "learning_rate": 7.057777777777778e-06,
+      "loss": 0.683,
+      "step": 1825
+    },
+    {
+      "epoch": 1.1817310763334399,
+      "grad_norm": 5.215100288391113,
+      "learning_rate": 7.0022222222222225e-06,
+      "loss": 0.6623,
+      "step": 1850
+    },
+    {
+      "epoch": 1.1977004152028106,
+      "grad_norm": 5.460089206695557,
+      "learning_rate": 6.946666666666667e-06,
+      "loss": 0.6519,
+      "step": 1875
+    },
+    {
+      "epoch": 1.2136697540721815,
+      "grad_norm": 5.882613182067871,
+      "learning_rate": 6.891111111111111e-06,
+      "loss": 0.6788,
+      "step": 1900
+    },
+    {
+      "epoch": 1.2296390929415522,
+      "grad_norm": 5.0330915451049805,
+      "learning_rate": 6.835555555555556e-06,
+      "loss": 0.6654,
+      "step": 1925
+    },
+    {
+      "epoch": 1.245608431810923,
+      "grad_norm": 4.624293804168701,
+      "learning_rate": 6.780000000000001e-06,
+      "loss": 0.6394,
+      "step": 1950
+    },
+    {
+      "epoch": 1.2615777706802938,
+      "grad_norm": 5.252276420593262,
+      "learning_rate": 6.724444444444444e-06,
+      "loss": 0.6522,
+      "step": 1975
+    },
+    {
+      "epoch": 1.2775471095496647,
+      "grad_norm": 5.393728733062744,
+      "learning_rate": 6.668888888888889e-06,
+      "loss": 0.6673,
+      "step": 2000
+    },
+    {
+      "epoch": 1.2775471095496647,
+      "eval_loss": 0.41851192712783813,
+      "eval_runtime": 1656.9911,
+      "eval_samples_per_second": 2.354,
+      "eval_steps_per_second": 0.147,
+      "eval_wer": 0.31497593933796725,
+      "step": 2000
+    },
+    {
+      "epoch": 1.2935164484190356,
+      "grad_norm": 7.740047454833984,
+      "learning_rate": 6.613333333333334e-06,
+      "loss": 0.643,
+      "step": 2025
+    },
+    {
+      "epoch": 1.3094857872884063,
+      "grad_norm": 4.754881381988525,
+      "learning_rate": 6.557777777777778e-06,
+      "loss": 0.6545,
+      "step": 2050
+    },
+    {
+      "epoch": 1.325455126157777,
+      "grad_norm": 5.5493693351745605,
+      "learning_rate": 6.502222222222223e-06,
+      "loss": 0.6741,
+      "step": 2075
+    },
+    {
+      "epoch": 1.3414244650271478,
+      "grad_norm": 5.743963241577148,
+      "learning_rate": 6.446666666666668e-06,
+      "loss": 0.65,
+      "step": 2100
+    },
+    {
+      "epoch": 1.3573938038965188,
+      "grad_norm": 6.479578495025635,
+      "learning_rate": 6.391111111111111e-06,
+      "loss": 0.6345,
+      "step": 2125
+    },
+    {
+      "epoch": 1.3733631427658894,
+      "grad_norm": 5.8687286376953125,
+      "learning_rate": 6.335555555555556e-06,
+      "loss": 0.6612,
+      "step": 2150
+    },
+    {
+      "epoch": 1.3893324816352604,
+      "grad_norm": 4.9471893310546875,
+      "learning_rate": 6.280000000000001e-06,
+      "loss": 0.6616,
+      "step": 2175
+    },
+    {
+      "epoch": 1.405301820504631,
+      "grad_norm": 4.968109130859375,
+      "learning_rate": 6.224444444444445e-06,
+      "loss": 0.6405,
+      "step": 2200
+    },
+    {
+      "epoch": 1.421271159374002,
+      "grad_norm": 6.063320159912109,
+      "learning_rate": 6.16888888888889e-06,
+      "loss": 0.6615,
+      "step": 2225
+    },
+    {
+      "epoch": 1.4372404982433729,
+      "grad_norm": 5.78763484954834,
+      "learning_rate": 6.113333333333333e-06,
+      "loss": 0.6308,
+      "step": 2250
+    },
+    {
+      "epoch": 1.4532098371127435,
+      "grad_norm": 5.714231014251709,
+      "learning_rate": 6.057777777777778e-06,
+      "loss": 0.6619,
+      "step": 2275
+    },
+    {
+      "epoch": 1.4691791759821142,
+      "grad_norm": 7.075171947479248,
+      "learning_rate": 6.002222222222223e-06,
+      "loss": 0.6748,
+      "step": 2300
+    },
+    {
+      "epoch": 1.4851485148514851,
+      "grad_norm": 6.3147969245910645,
+      "learning_rate": 5.946666666666668e-06,
+      "loss": 0.6441,
+      "step": 2325
+    },
+    {
+      "epoch": 1.501117853720856,
+      "grad_norm": 6.079730033874512,
+      "learning_rate": 5.891111111111112e-06,
+      "loss": 0.6303,
+      "step": 2350
+    },
+    {
+      "epoch": 1.5170871925902267,
+      "grad_norm": 5.132657527923584,
+      "learning_rate": 5.8355555555555565e-06,
+      "loss": 0.6718,
+      "step": 2375
+    },
+    {
+      "epoch": 1.5330565314595974,
+      "grad_norm": 5.566254138946533,
+      "learning_rate": 5.78e-06,
+      "loss": 0.6686,
+      "step": 2400
+    },
+    {
+      "epoch": 1.5490258703289683,
+      "grad_norm": 6.299161911010742,
+      "learning_rate": 5.724444444444445e-06,
+      "loss": 0.6485,
+      "step": 2425
+    },
+    {
+      "epoch": 1.5649952091983392,
+      "grad_norm": 5.586335182189941,
+      "learning_rate": 5.6688888888888895e-06,
+      "loss": 0.6031,
+      "step": 2450
+    },
+    {
+      "epoch": 1.5809645480677101,
+      "grad_norm": 5.396686553955078,
+      "learning_rate": 5.613333333333334e-06,
+      "loss": 0.6407,
+      "step": 2475
+    },
+    {
+      "epoch": 1.5969338869370808,
+      "grad_norm": 6.483650207519531,
+      "learning_rate": 5.557777777777778e-06,
+      "loss": 0.6703,
+      "step": 2500
+    },
+    {
+      "epoch": 1.6129032258064515,
+      "grad_norm": 5.946717739105225,
+      "learning_rate": 5.5022222222222224e-06,
+      "loss": 0.6079,
+      "step": 2525
+    },
+    {
+      "epoch": 1.6288725646758224,
+      "grad_norm": 5.817110061645508,
+      "learning_rate": 5.4466666666666665e-06,
+      "loss": 0.6275,
+      "step": 2550
+    },
+    {
+      "epoch": 1.6448419035451933,
+      "grad_norm": 5.888713836669922,
+      "learning_rate": 5.391111111111111e-06,
+      "loss": 0.64,
+      "step": 2575
+    },
+    {
+      "epoch": 1.660811242414564,
+      "grad_norm": 5.296940803527832,
+      "learning_rate": 5.335555555555556e-06,
+      "loss": 0.6278,
+      "step": 2600
+    },
+    {
+      "epoch": 1.6767805812839347,
+      "grad_norm": 5.4542388916015625,
+      "learning_rate": 5.28e-06,
+      "loss": 0.6236,
+      "step": 2625
+    },
+    {
+      "epoch": 1.6927499201533056,
+      "grad_norm": 5.980891227722168,
+      "learning_rate": 5.224444444444445e-06,
+      "loss": 0.6607,
+      "step": 2650
+    },
+    {
+      "epoch": 1.7087192590226765,
+      "grad_norm": 7.3297014236450195,
+      "learning_rate": 5.168888888888889e-06,
+      "loss": 0.673,
+      "step": 2675
+    },
+    {
+      "epoch": 1.7246885978920474,
+      "grad_norm": 4.739117622375488,
+      "learning_rate": 5.113333333333333e-06,
+      "loss": 0.5893,
+      "step": 2700
+    },
+    {
+      "epoch": 1.7406579367614181,
+      "grad_norm": 4.754851818084717,
+      "learning_rate": 5.057777777777778e-06,
+      "loss": 0.6459,
+      "step": 2725
+    },
+    {
+      "epoch": 1.7566272756307888,
+      "grad_norm": 4.72427225112915,
+      "learning_rate": 5.002222222222223e-06,
+      "loss": 0.6093,
+      "step": 2750
+    },
+    {
+      "epoch": 1.7725966145001597,
+      "grad_norm": 5.871730327606201,
+      "learning_rate": 4.946666666666667e-06,
+      "loss": 0.6088,
+      "step": 2775
+    },
+    {
+      "epoch": 1.7885659533695306,
+      "grad_norm": 5.640221118927002,
+      "learning_rate": 4.891111111111111e-06,
+      "loss": 0.6214,
+      "step": 2800
+    },
+    {
+      "epoch": 1.8045352922389013,
+      "grad_norm": 5.749293327331543,
+      "learning_rate": 4.835555555555556e-06,
+      "loss": 0.6105,
+      "step": 2825
+    },
+    {
+      "epoch": 1.820504631108272,
+      "grad_norm": 6.414817810058594,
+      "learning_rate": 4.78e-06,
+      "loss": 0.6523,
+      "step": 2850
+    },
+    {
+      "epoch": 1.836473969977643,
+      "grad_norm": 5.655066967010498,
+      "learning_rate": 4.724444444444445e-06,
+      "loss": 0.6348,
+      "step": 2875
+    },
+    {
+      "epoch": 1.8524433088470138,
+      "grad_norm": 5.395172119140625,
+      "learning_rate": 4.66888888888889e-06,
+      "loss": 0.634,
+      "step": 2900
+    },
+    {
+      "epoch": 1.8684126477163845,
+      "grad_norm": 4.894425868988037,
+      "learning_rate": 4.613333333333334e-06,
+      "loss": 0.595,
+      "step": 2925
+    },
+    {
+      "epoch": 1.8843819865857554,
+      "grad_norm": 5.686845779418945,
+      "learning_rate": 4.557777777777778e-06,
+      "loss": 0.6113,
+      "step": 2950
+    },
+    {
+      "epoch": 1.900351325455126,
+      "grad_norm": 5.444372177124023,
+      "learning_rate": 4.502222222222223e-06,
+      "loss": 0.6277,
+      "step": 2975
+    },
+    {
+      "epoch": 1.916320664324497,
+      "grad_norm": 5.224650859832764,
+      "learning_rate": 4.446666666666667e-06,
+      "loss": 0.6601,
+      "step": 3000
+    },
+    {
+      "epoch": 1.916320664324497,
+      "eval_loss": 0.389792263507843,
+      "eval_runtime": 1666.4367,
+      "eval_samples_per_second": 2.341,
+      "eval_steps_per_second": 0.146,
+      "eval_wer": 0.2912798327905507,
+      "step": 3000
+    },
+    {
+      "epoch": 1.932290003193868,
+      "grad_norm": 4.736299514770508,
+      "learning_rate": 4.391111111111112e-06,
+      "loss": 0.6351,
+      "step": 3025
+    },
+    {
+      "epoch": 1.9482593420632386,
+      "grad_norm": 4.797982692718506,
+      "learning_rate": 4.3355555555555565e-06,
+      "loss": 0.6017,
+      "step": 3050
+    },
+    {
+      "epoch": 1.9642286809326093,
+      "grad_norm": 5.4142022132873535,
+      "learning_rate": 4.2800000000000005e-06,
+      "loss": 0.6124,
+      "step": 3075
+    },
+    {
+      "epoch": 1.9801980198019802,
+      "grad_norm": 5.374582767486572,
+      "learning_rate": 4.2244444444444446e-06,
+      "loss": 0.6243,
+      "step": 3100
+    },
+    {
+      "epoch": 1.996167358671351,
+      "grad_norm": 4.480958938598633,
+      "learning_rate": 4.168888888888889e-06,
+      "loss": 0.5981,
+      "step": 3125
+    },
+    {
+      "epoch": 2.012136697540722,
+      "grad_norm": 5.1775736808776855,
+      "learning_rate": 4.1133333333333335e-06,
+      "loss": 0.4607,
+      "step": 3150
+    },
+    {
+      "epoch": 2.0281060364100925,
+      "grad_norm": 4.18350887298584,
+      "learning_rate": 4.057777777777778e-06,
+      "loss": 0.3907,
+      "step": 3175
+    },
+    {
+      "epoch": 2.0440753752794634,
+      "grad_norm": 4.902673721313477,
+      "learning_rate": 4.002222222222222e-06,
+      "loss": 0.4004,
+      "step": 3200
+    },
+    {
+      "epoch": 2.0600447141488343,
+      "grad_norm": 4.528185844421387,
+      "learning_rate": 3.946666666666667e-06,
+      "loss": 0.3936,
+      "step": 3225
+    },
+    {
+      "epoch": 2.076014053018205,
+      "grad_norm": 5.0421061515808105,
+      "learning_rate": 3.891111111111111e-06,
+      "loss": 0.4242,
+      "step": 3250
+    },
+    {
+      "epoch": 2.0919833918875756,
+      "grad_norm": 4.861361026763916,
+      "learning_rate": 3.835555555555555e-06,
+      "loss": 0.3916,
+      "step": 3275
+    },
+    {
+      "epoch": 2.1079527307569466,
+      "grad_norm": 6.367924213409424,
+      "learning_rate": 3.7800000000000002e-06,
+      "loss": 0.413,
+      "step": 3300
+    },
+    {
+      "epoch": 2.1239220696263175,
+      "grad_norm": 3.6156017780303955,
+      "learning_rate": 3.724444444444445e-06,
+      "loss": 0.4103,
+      "step": 3325
+    },
+    {
+      "epoch": 2.1398914084956884,
+      "grad_norm": 5.132371425628662,
+      "learning_rate": 3.668888888888889e-06,
+      "loss": 0.407,
+      "step": 3350
+    },
+    {
+      "epoch": 2.1558607473650593,
+      "grad_norm": 4.446967601776123,
+      "learning_rate": 3.6133333333333336e-06,
+      "loss": 0.4082,
+      "step": 3375
+    },
+    {
+      "epoch": 2.1718300862344297,
+      "grad_norm": 5.18086576461792,
+      "learning_rate": 3.5577777777777785e-06,
+      "loss": 0.4053,
+      "step": 3400
+    },
+    {
+      "epoch": 2.1877994251038007,
+      "grad_norm": 4.955840587615967,
+      "learning_rate": 3.5022222222222225e-06,
+      "loss": 0.3995,
+      "step": 3425
+    },
+    {
+      "epoch": 2.2037687639731716,
+      "grad_norm": 4.59506368637085,
+      "learning_rate": 3.446666666666667e-06,
+      "loss": 0.4064,
+      "step": 3450
+    },
+    {
+      "epoch": 2.2197381028425425,
+      "grad_norm": 5.527169227600098,
+      "learning_rate": 3.391111111111111e-06,
+      "loss": 0.3946,
+      "step": 3475
+    },
+    {
+      "epoch": 2.235707441711913,
+      "grad_norm": 5.034458637237549,
+      "learning_rate": 3.335555555555556e-06,
+      "loss": 0.4088,
+      "step": 3500
+    },
+    {
+      "epoch": 2.251676780581284,
+      "grad_norm": 4.814046382904053,
+      "learning_rate": 3.2800000000000004e-06,
+      "loss": 0.4032,
+      "step": 3525
+    },
+    {
+      "epoch": 2.2676461194506548,
+      "grad_norm": 5.115776062011719,
+      "learning_rate": 3.2244444444444444e-06,
+      "loss": 0.4009,
+      "step": 3550
+    },
+    {
+      "epoch": 2.2836154583200257,
+      "grad_norm": 4.4629130363464355,
+      "learning_rate": 3.1688888888888893e-06,
+      "loss": 0.408,
+      "step": 3575
+    },
+    {
+      "epoch": 2.2995847971893966,
+      "grad_norm": 4.914193153381348,
+      "learning_rate": 3.1133333333333337e-06,
+      "loss": 0.3867,
+      "step": 3600
+    },
+    {
+      "epoch": 2.315554136058767,
+      "grad_norm": 4.358288288116455,
+      "learning_rate": 3.0577777777777778e-06,
+      "loss": 0.4485,
+      "step": 3625
+    },
+    {
+      "epoch": 2.331523474928138,
+      "grad_norm": 4.8634161949157715,
+      "learning_rate": 3.0022222222222227e-06,
+      "loss": 0.389,
+      "step": 3650
+    },
+    {
+      "epoch": 2.347492813797509,
+      "grad_norm": 5.283102989196777,
+      "learning_rate": 2.946666666666667e-06,
+      "loss": 0.395,
+      "step": 3675
+    },
+    {
+      "epoch": 2.3634621526668798,
+      "grad_norm": 4.393892765045166,
+      "learning_rate": 2.891111111111111e-06,
+      "loss": 0.4047,
+      "step": 3700
+    },
+    {
+      "epoch": 2.37943149153625,
+      "grad_norm": 5.246931552886963,
+      "learning_rate": 2.835555555555556e-06,
+      "loss": 0.3957,
+      "step": 3725
+    },
+    {
+      "epoch": 2.395400830405621,
+      "grad_norm": 4.599190711975098,
+      "learning_rate": 2.7800000000000005e-06,
+      "loss": 0.3829,
+      "step": 3750
+    },
+    {
+      "epoch": 2.411370169274992,
+      "grad_norm": 5.136366367340088,
+      "learning_rate": 2.7244444444444445e-06,
+      "loss": 0.4029,
+      "step": 3775
+    },
+    {
+      "epoch": 2.427339508144363,
+      "grad_norm": 4.448164463043213,
+      "learning_rate": 2.6688888888888894e-06,
+      "loss": 0.3869,
+      "step": 3800
+    },
+    {
+      "epoch": 2.443308847013734,
+      "grad_norm": 5.434977054595947,
+      "learning_rate": 2.6133333333333334e-06,
+      "loss": 0.3871,
+      "step": 3825
+    },
+    {
+      "epoch": 2.4592781858831043,
+      "grad_norm": 5.272222518920898,
+      "learning_rate": 2.557777777777778e-06,
+      "loss": 0.388,
+      "step": 3850
+    },
+    {
+      "epoch": 2.4752475247524752,
+      "grad_norm": 5.163560390472412,
+      "learning_rate": 2.5022222222222224e-06,
+      "loss": 0.4156,
+      "step": 3875
+    },
+    {
+      "epoch": 2.491216863621846,
+      "grad_norm": 4.091150283813477,
+      "learning_rate": 2.446666666666667e-06,
+      "loss": 0.4093,
+      "step": 3900
+    },
+    {
+      "epoch": 2.5071862024912166,
+      "grad_norm": 4.858378887176514,
+      "learning_rate": 2.3911111111111113e-06,
+      "loss": 0.4068,
+      "step": 3925
+    },
+    {
+      "epoch": 2.5231555413605875,
+      "grad_norm": 5.535120487213135,
+      "learning_rate": 2.3355555555555557e-06,
+      "loss": 0.3992,
+      "step": 3950
+    },
+    {
+      "epoch": 2.5391248802299584,
+      "grad_norm": 4.70937442779541,
+      "learning_rate": 2.28e-06,
+      "loss": 0.4009,
+      "step": 3975
+    },
+    {
+      "epoch": 2.5550942190993293,
+      "grad_norm": 5.273413181304932,
+      "learning_rate": 2.2244444444444447e-06,
+      "loss": 0.3887,
+      "step": 4000
+    },
+    {
+      "epoch": 2.5550942190993293,
+      "eval_loss": 0.3953465223312378,
+      "eval_runtime": 1634.3629,
+      "eval_samples_per_second": 2.387,
+      "eval_steps_per_second": 0.149,
+      "eval_wer": 0.2815826568803772,
+      "step": 4000
+    },
+    {
+      "epoch": 2.5710635579687002,
+      "grad_norm": 5.2189483642578125,
+      "learning_rate": 2.168888888888889e-06,
+      "loss": 0.3789,
+      "step": 4025
+    },
+    {
+      "epoch": 2.587032896838071,
+      "grad_norm": 5.335792064666748,
+      "learning_rate": 2.1133333333333336e-06,
+      "loss": 0.3911,
+      "step": 4050
+    },
+    {
+      "epoch": 2.6030022357074416,
+      "grad_norm": 4.469089508056641,
+      "learning_rate": 2.057777777777778e-06,
+      "loss": 0.388,
+      "step": 4075
+    },
+    {
+      "epoch": 2.6189715745768125,
+      "grad_norm": 4.454526424407959,
+      "learning_rate": 2.0022222222222225e-06,
+      "loss": 0.3829,
+      "step": 4100
+    },
+    {
+      "epoch": 2.6349409134461834,
+      "grad_norm": 5.536740303039551,
+      "learning_rate": 1.9466666666666665e-06,
+      "loss": 0.3846,
+      "step": 4125
+    },
+    {
+      "epoch": 2.650910252315554,
+      "grad_norm": 4.877845764160156,
+      "learning_rate": 1.8911111111111114e-06,
+      "loss": 0.4116,
+      "step": 4150
+    },
+    {
+      "epoch": 2.666879591184925,
+      "grad_norm": 5.3322014808654785,
+      "learning_rate": 1.8355555555555557e-06,
+      "loss": 0.3857,
+      "step": 4175
+    },
+    {
+      "epoch": 2.6828489300542957,
+      "grad_norm": 4.9132866859436035,
+      "learning_rate": 1.7800000000000001e-06,
+      "loss": 0.3922,
+      "step": 4200
+    },
+    {
+      "epoch": 2.6988182689236666,
+      "grad_norm": 5.380650043487549,
+      "learning_rate": 1.7244444444444448e-06,
+      "loss": 0.3757,
+      "step": 4225
+    },
+    {
+      "epoch": 2.7147876077930375,
+      "grad_norm": 4.5387282371521,
+      "learning_rate": 1.668888888888889e-06,
+      "loss": 0.3919,
+      "step": 4250
+    },
+    {
+      "epoch": 2.7307569466624084,
+      "grad_norm": 5.215519428253174,
+      "learning_rate": 1.6133333333333335e-06,
+      "loss": 0.4104,
+      "step": 4275
+    },
+    {
+      "epoch": 2.746726285531779,
+      "grad_norm": 6.126536846160889,
+      "learning_rate": 1.5577777777777777e-06,
+      "loss": 0.3792,
+      "step": 4300
+    },
+    {
+      "epoch": 2.76269562440115,
+      "grad_norm": 5.707647323608398,
+      "learning_rate": 1.5022222222222224e-06,
+      "loss": 0.4041,
+      "step": 4325
+    },
+    {
+      "epoch": 2.7786649632705207,
+      "grad_norm": 5.024244785308838,
+      "learning_rate": 1.4466666666666669e-06,
+      "loss": 0.4045,
+      "step": 4350
+    },
+    {
+      "epoch": 2.794634302139891,
+      "grad_norm": 5.240878105163574,
+      "learning_rate": 1.3911111111111111e-06,
+      "loss": 0.4022,
+      "step": 4375
+    },
+    {
+      "epoch": 2.810603641009262,
+      "grad_norm": 5.824533939361572,
+      "learning_rate": 1.3355555555555558e-06,
+      "loss": 0.3947,
+      "step": 4400
+    },
+    {
+      "epoch": 2.826572979878633,
+      "grad_norm": 4.228843688964844,
+      "learning_rate": 1.28e-06,
+      "loss": 0.3725,
+      "step": 4425
+    },
+    {
+      "epoch": 2.842542318748004,
+      "grad_norm": 4.704908847808838,
+      "learning_rate": 1.2244444444444445e-06,
+      "loss": 0.4001,
+      "step": 4450
+    },
+    {
+      "epoch": 2.858511657617375,
+      "grad_norm": 4.397124767303467,
+      "learning_rate": 1.168888888888889e-06,
+      "loss": 0.381,
+      "step": 4475
+    },
+    {
+      "epoch": 2.8744809964867457,
+      "grad_norm": 4.7757062911987305,
+      "learning_rate": 1.1133333333333334e-06,
+      "loss": 0.3689,
+      "step": 4500
+    },
+    {
+      "epoch": 2.890450335356116,
+      "grad_norm": 4.855478763580322,
+      "learning_rate": 1.0577777777777779e-06,
+      "loss": 0.373,
+      "step": 4525
+    },
+    {
+      "epoch": 2.906419674225487,
+      "grad_norm": 5.458982467651367,
+      "learning_rate": 1.0022222222222223e-06,
+      "loss": 0.3919,
+      "step": 4550
+    },
+    {
+      "epoch": 2.922389013094858,
+      "grad_norm": 3.9116828441619873,
+      "learning_rate": 9.466666666666667e-07,
+      "loss": 0.4143,
+      "step": 4575
+    },
+    {
+      "epoch": 2.9383583519642285,
+      "grad_norm": 4.8182878494262695,
+      "learning_rate": 8.911111111111112e-07,
+      "loss": 0.3726,
+      "step": 4600
+    },
+    {
+      "epoch": 2.9543276908335994,
+      "grad_norm": 5.018452167510986,
+      "learning_rate": 8.355555555555556e-07,
+      "loss": 0.4018,
+      "step": 4625
+    },
+    {
+      "epoch": 2.9702970297029703,
+      "grad_norm": 5.315720081329346,
+      "learning_rate": 7.8e-07,
+      "loss": 0.3648,
+      "step": 4650
+    },
+    {
+      "epoch": 2.986266368572341,
+      "grad_norm": 4.65333890914917,
+      "learning_rate": 7.266666666666668e-07,
+      "loss": 0.3865,
+      "step": 4675
+    },
+    {
+      "epoch": 3.002235707441712,
+      "grad_norm": 2.9384920597076416,
+      "learning_rate": 6.711111111111111e-07,
+      "loss": 0.3793,
+      "step": 4700
+    },
+    {
+      "epoch": 3.0182050463110826,
+      "grad_norm": 4.572001934051514,
+      "learning_rate": 6.155555555555556e-07,
+      "loss": 0.2557,
+      "step": 4725
+    },
+    {
+      "epoch": 3.0341743851804535,
+      "grad_norm": 3.237912654876709,
+      "learning_rate": 5.6e-07,
+      "loss": 0.252,
+      "step": 4750
+    },
+    {
+      "epoch": 3.0501437240498244,
+      "grad_norm": 3.8456506729125977,
+      "learning_rate": 5.044444444444445e-07,
+      "loss": 0.2556,
+      "step": 4775
+    },
+    {
+      "epoch": 3.0661130629191953,
+      "grad_norm": 4.627549171447754,
+      "learning_rate": 4.488888888888889e-07,
+      "loss": 0.2618,
+      "step": 4800
+    },
+    {
+      "epoch": 3.0820824017885657,
+      "grad_norm": 4.322127342224121,
+      "learning_rate": 3.9333333333333336e-07,
+      "loss": 0.2715,
+      "step": 4825
+    },
+    {
+      "epoch": 3.0980517406579366,
+      "grad_norm": 4.883068084716797,
+      "learning_rate": 3.3777777777777777e-07,
+      "loss": 0.2585,
+      "step": 4850
+    },
+    {
+      "epoch": 3.1140210795273076,
+      "grad_norm": 4.687808513641357,
+      "learning_rate": 2.822222222222222e-07,
+      "loss": 0.2593,
+      "step": 4875
+    },
+    {
+      "epoch": 3.1299904183966785,
+      "grad_norm": 4.111570835113525,
+      "learning_rate": 2.266666666666667e-07,
+      "loss": 0.2645,
+      "step": 4900
+    },
+    {
+      "epoch": 3.1459597572660494,
+      "grad_norm": 4.777975082397461,
+      "learning_rate": 1.7111111111111114e-07,
+      "loss": 0.2594,
+      "step": 4925
+    },
+    {
+      "epoch": 3.16192909613542,
+      "grad_norm": 4.395523548126221,
+      "learning_rate": 1.1555555555555556e-07,
+      "loss": 0.2665,
+      "step": 4950
+    },
+    {
+      "epoch": 3.1778984350047907,
+      "grad_norm": 5.332032203674316,
+      "learning_rate": 6.000000000000001e-08,
+      "loss": 0.2772,
+      "step": 4975
+    },
+    {
+      "epoch": 3.1938677738741617,
+      "grad_norm": 4.1832733154296875,
+      "learning_rate": 4.444444444444445e-09,
+      "loss": 0.2637,
+      "step": 5000
+    },
+    {
+      "epoch": 3.1938677738741617,
+      "eval_loss": 0.4074931740760803,
+      "eval_runtime": 1670.4337,
+      "eval_samples_per_second": 2.335,
+      "eval_steps_per_second": 0.146,
+      "eval_wer": 0.2823846789481359,
+      "step": 5000
+    },
+    {
+      "epoch": 3.1938677738741617,
+      "step": 5000,
+      "total_flos": 5.435895365546803e+20,
+      "train_loss": 0.7105956988334656,
+      "train_runtime": 59674.9515,
+      "train_samples_per_second": 2.681,
+      "train_steps_per_second": 0.084
+    }
+  ],
+  "logging_steps": 25,
+  "max_steps": 5000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 1000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 5.435895365546803e+20,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}