End of training

Browse files

Files changed (6) hide show

README.md +1 -1
all_results.json +15 -0
eval_results.json +9 -0
runs/Oct29_13-03-52_443023c04f1e/events.out.tfevents.1730266679.443023c04f1e.36.1 +3 -0
train_results.json +9 -0
trainer_state.json +1487 -0

README.md CHANGED Viewed

@@ -15,7 +15,7 @@ should probably proofread and complete it, then remove this comment. -->
 # whisper-large-v3-ft-btb-ca-cy
-This model is a fine-tuned version of [openai/whisper-large-v3](https://huggingface.co/openai/whisper-large-v3) on an unknown dataset.
 It achieves the following results on the evaluation set:
 - Loss: 0.4037
 - Wer: 0.2791

 # whisper-large-v3-ft-btb-ca-cy
+This model is a fine-tuned version of [openai/whisper-large-v3](https://huggingface.co/openai/whisper-large-v3) on the DewiBrynJones/banc-trawsgrifiadau-bangor-clean train main, cymen-arfor/15awr train+dev+test main dataset.
 It achieves the following results on the evaluation set:
 - Loss: 0.4037
 - Wer: 0.2791

all_results.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+    "epoch": 3.1938677738741617,
+    "eval_loss": 0.40369176864624023,
+    "eval_runtime": 1635.5488,
+    "eval_samples": 3901,
+    "eval_samples_per_second": 2.385,
+    "eval_steps_per_second": 0.149,
+    "eval_wer": 0.27909784603496574,
+    "total_flos": 5.435895365546803e+20,
+    "train_loss": 0.3537434986591339,
+    "train_runtime": 57744.1557,
+    "train_samples": 50095,
+    "train_samples_per_second": 2.771,
+    "train_steps_per_second": 0.087
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 3.1938677738741617,
+    "eval_loss": 0.40369176864624023,
+    "eval_runtime": 1635.5488,
+    "eval_samples": 3901,
+    "eval_samples_per_second": 2.385,
+    "eval_steps_per_second": 0.149,
+    "eval_wer": 0.27909784603496574
+}

runs/Oct29_13-03-52_443023c04f1e/events.out.tfevents.1730266679.443023c04f1e.36.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f7e733abdd8dffbbbf172dd82e60ec570d7c09c5639c4c1d7551b2971f92437b
+size 406

train_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 3.1938677738741617,
+    "total_flos": 5.435895365546803e+20,
+    "train_loss": 0.3537434986591339,
+    "train_runtime": 57744.1557,
+    "train_samples": 50095,
+    "train_samples_per_second": 2.771,
+    "train_steps_per_second": 0.087
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,1487 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.1938677738741617,
+  "eval_steps": 1000,
+  "global_step": 5000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.015969338869370808,
+      "grad_norm": 9.112746238708496,
+      "learning_rate": 5.000000000000001e-07,
+      "loss": 1.6703,
+      "step": 25
+    },
+    {
+      "epoch": 0.031938677738741615,
+      "grad_norm": 5.510941505432129,
+      "learning_rate": 1.0000000000000002e-06,
+      "loss": 1.2963,
+      "step": 50
+    },
+    {
+      "epoch": 0.047908016608112426,
+      "grad_norm": 5.158988952636719,
+      "learning_rate": 1.5e-06,
+      "loss": 1.0164,
+      "step": 75
+    },
+    {
+      "epoch": 0.06387735547748323,
+      "grad_norm": 5.4585957527160645,
+      "learning_rate": 2.0000000000000003e-06,
+      "loss": 0.8123,
+      "step": 100
+    },
+    {
+      "epoch": 0.07984669434685404,
+      "grad_norm": 5.480216979980469,
+      "learning_rate": 2.5e-06,
+      "loss": 0.7918,
+      "step": 125
+    },
+    {
+      "epoch": 0.09581603321622485,
+      "grad_norm": 5.493738651275635,
+      "learning_rate": 3e-06,
+      "loss": 0.7452,
+      "step": 150
+    },
+    {
+      "epoch": 0.11178537208559565,
+      "grad_norm": 4.575730800628662,
+      "learning_rate": 3.5e-06,
+      "loss": 0.7583,
+      "step": 175
+    },
+    {
+      "epoch": 0.12775471095496646,
+      "grad_norm": 4.527775764465332,
+      "learning_rate": 4.000000000000001e-06,
+      "loss": 0.7108,
+      "step": 200
+    },
+    {
+      "epoch": 0.14372404982433729,
+      "grad_norm": 4.807627201080322,
+      "learning_rate": 4.5e-06,
+      "loss": 0.7146,
+      "step": 225
+    },
+    {
+      "epoch": 0.15969338869370808,
+      "grad_norm": 4.733044147491455,
+      "learning_rate": 5e-06,
+      "loss": 0.659,
+      "step": 250
+    },
+    {
+      "epoch": 0.17566272756307888,
+      "grad_norm": 5.310563564300537,
+      "learning_rate": 5.500000000000001e-06,
+      "loss": 0.7,
+      "step": 275
+    },
+    {
+      "epoch": 0.1916320664324497,
+      "grad_norm": 4.584811687469482,
+      "learning_rate": 6e-06,
+      "loss": 0.6643,
+      "step": 300
+    },
+    {
+      "epoch": 0.2076014053018205,
+      "grad_norm": 5.011808395385742,
+      "learning_rate": 6.5000000000000004e-06,
+      "loss": 0.6696,
+      "step": 325
+    },
+    {
+      "epoch": 0.2235707441711913,
+      "grad_norm": 4.633641719818115,
+      "learning_rate": 7e-06,
+      "loss": 0.6692,
+      "step": 350
+    },
+    {
+      "epoch": 0.23954008304056212,
+      "grad_norm": 3.665330648422241,
+      "learning_rate": 7.500000000000001e-06,
+      "loss": 0.6285,
+      "step": 375
+    },
+    {
+      "epoch": 0.2555094219099329,
+      "grad_norm": 5.0646185874938965,
+      "learning_rate": 8.000000000000001e-06,
+      "loss": 0.6313,
+      "step": 400
+    },
+    {
+      "epoch": 0.2714787607793037,
+      "grad_norm": 3.9207818508148193,
+      "learning_rate": 8.5e-06,
+      "loss": 0.6045,
+      "step": 425
+    },
+    {
+      "epoch": 0.28744809964867457,
+      "grad_norm": 4.316348552703857,
+      "learning_rate": 9e-06,
+      "loss": 0.6044,
+      "step": 450
+    },
+    {
+      "epoch": 0.30341743851804537,
+      "grad_norm": 3.9778010845184326,
+      "learning_rate": 9.5e-06,
+      "loss": 0.6015,
+      "step": 475
+    },
+    {
+      "epoch": 0.31938677738741617,
+      "grad_norm": 3.8584372997283936,
+      "learning_rate": 1e-05,
+      "loss": 0.5793,
+      "step": 500
+    },
+    {
+      "epoch": 0.33535611625678696,
+      "grad_norm": 4.777307033538818,
+      "learning_rate": 9.944444444444445e-06,
+      "loss": 0.6289,
+      "step": 525
+    },
+    {
+      "epoch": 0.35132545512615776,
+      "grad_norm": 4.359010219573975,
+      "learning_rate": 9.88888888888889e-06,
+      "loss": 0.6047,
+      "step": 550
+    },
+    {
+      "epoch": 0.3672947939955286,
+      "grad_norm": 4.476827621459961,
+      "learning_rate": 9.833333333333333e-06,
+      "loss": 0.5868,
+      "step": 575
+    },
+    {
+      "epoch": 0.3832641328648994,
+      "grad_norm": 3.478997230529785,
+      "learning_rate": 9.777777777777779e-06,
+      "loss": 0.5877,
+      "step": 600
+    },
+    {
+      "epoch": 0.3992334717342702,
+      "grad_norm": 4.191431522369385,
+      "learning_rate": 9.722222222222223e-06,
+      "loss": 0.547,
+      "step": 625
+    },
+    {
+      "epoch": 0.415202810603641,
+      "grad_norm": 3.8496177196502686,
+      "learning_rate": 9.666666666666667e-06,
+      "loss": 0.5511,
+      "step": 650
+    },
+    {
+      "epoch": 0.4311721494730118,
+      "grad_norm": 3.78070330619812,
+      "learning_rate": 9.611111111111112e-06,
+      "loss": 0.5503,
+      "step": 675
+    },
+    {
+      "epoch": 0.4471414883423826,
+      "grad_norm": 3.223071813583374,
+      "learning_rate": 9.555555555555556e-06,
+      "loss": 0.5418,
+      "step": 700
+    },
+    {
+      "epoch": 0.46311082721175345,
+      "grad_norm": 4.323662281036377,
+      "learning_rate": 9.5e-06,
+      "loss": 0.5362,
+      "step": 725
+    },
+    {
+      "epoch": 0.47908016608112425,
+      "grad_norm": 3.7754769325256348,
+      "learning_rate": 9.444444444444445e-06,
+      "loss": 0.5478,
+      "step": 750
+    },
+    {
+      "epoch": 0.49504950495049505,
+      "grad_norm": 4.361028671264648,
+      "learning_rate": 9.38888888888889e-06,
+      "loss": 0.5257,
+      "step": 775
+    },
+    {
+      "epoch": 0.5110188438198658,
+      "grad_norm": 3.5216853618621826,
+      "learning_rate": 9.333333333333334e-06,
+      "loss": 0.5046,
+      "step": 800
+    },
+    {
+      "epoch": 0.5269881826892366,
+      "grad_norm": 3.39201021194458,
+      "learning_rate": 9.277777777777778e-06,
+      "loss": 0.5209,
+      "step": 825
+    },
+    {
+      "epoch": 0.5429575215586074,
+      "grad_norm": 3.3704864978790283,
+      "learning_rate": 9.222222222222224e-06,
+      "loss": 0.5128,
+      "step": 850
+    },
+    {
+      "epoch": 0.5589268604279782,
+      "grad_norm": 3.4954028129577637,
+      "learning_rate": 9.166666666666666e-06,
+      "loss": 0.4954,
+      "step": 875
+    },
+    {
+      "epoch": 0.5748961992973491,
+      "grad_norm": 3.267204523086548,
+      "learning_rate": 9.111111111111112e-06,
+      "loss": 0.5204,
+      "step": 900
+    },
+    {
+      "epoch": 0.5908655381667199,
+      "grad_norm": 3.802452802658081,
+      "learning_rate": 9.055555555555556e-06,
+      "loss": 0.4988,
+      "step": 925
+    },
+    {
+      "epoch": 0.6068348770360907,
+      "grad_norm": 3.5180504322052,
+      "learning_rate": 9e-06,
+      "loss": 0.5068,
+      "step": 950
+    },
+    {
+      "epoch": 0.6228042159054615,
+      "grad_norm": 3.5314347743988037,
+      "learning_rate": 8.944444444444446e-06,
+      "loss": 0.5039,
+      "step": 975
+    },
+    {
+      "epoch": 0.6387735547748323,
+      "grad_norm": 3.1311240196228027,
+      "learning_rate": 8.888888888888888e-06,
+      "loss": 0.4739,
+      "step": 1000
+    },
+    {
+      "epoch": 0.6387735547748323,
+      "eval_loss": 0.4840342402458191,
+      "eval_runtime": 1688.2428,
+      "eval_samples_per_second": 2.311,
+      "eval_steps_per_second": 0.145,
+      "eval_wer": 0.3517579445571332,
+      "step": 1000
+    },
+    {
+      "epoch": 0.6547428936442031,
+      "grad_norm": 2.715566396713257,
+      "learning_rate": 8.833333333333334e-06,
+      "loss": 0.502,
+      "step": 1025
+    },
+    {
+      "epoch": 0.6707122325135739,
+      "grad_norm": 3.692412853240967,
+      "learning_rate": 8.777777777777778e-06,
+      "loss": 0.4733,
+      "step": 1050
+    },
+    {
+      "epoch": 0.6866815713829447,
+      "grad_norm": 3.414935350418091,
+      "learning_rate": 8.722222222222224e-06,
+      "loss": 0.4945,
+      "step": 1075
+    },
+    {
+      "epoch": 0.7026509102523155,
+      "grad_norm": 3.058530569076538,
+      "learning_rate": 8.666666666666668e-06,
+      "loss": 0.5033,
+      "step": 1100
+    },
+    {
+      "epoch": 0.7186202491216863,
+      "grad_norm": 3.8033292293548584,
+      "learning_rate": 8.611111111111112e-06,
+      "loss": 0.483,
+      "step": 1125
+    },
+    {
+      "epoch": 0.7345895879910572,
+      "grad_norm": 3.238865852355957,
+      "learning_rate": 8.555555555555556e-06,
+      "loss": 0.4809,
+      "step": 1150
+    },
+    {
+      "epoch": 0.750558926860428,
+      "grad_norm": 2.665280342102051,
+      "learning_rate": 8.5e-06,
+      "loss": 0.4792,
+      "step": 1175
+    },
+    {
+      "epoch": 0.7665282657297988,
+      "grad_norm": 2.9003748893737793,
+      "learning_rate": 8.444444444444446e-06,
+      "loss": 0.4662,
+      "step": 1200
+    },
+    {
+      "epoch": 0.7824976045991696,
+      "grad_norm": 2.7720658779144287,
+      "learning_rate": 8.38888888888889e-06,
+      "loss": 0.4532,
+      "step": 1225
+    },
+    {
+      "epoch": 0.7984669434685404,
+      "grad_norm": 3.0096704959869385,
+      "learning_rate": 8.333333333333334e-06,
+      "loss": 0.4719,
+      "step": 1250
+    },
+    {
+      "epoch": 0.8144362823379112,
+      "grad_norm": 3.1118383407592773,
+      "learning_rate": 8.277777777777778e-06,
+      "loss": 0.4567,
+      "step": 1275
+    },
+    {
+      "epoch": 0.830405621207282,
+      "grad_norm": 2.854698896408081,
+      "learning_rate": 8.222222222222222e-06,
+      "loss": 0.4729,
+      "step": 1300
+    },
+    {
+      "epoch": 0.8463749600766528,
+      "grad_norm": 3.1824917793273926,
+      "learning_rate": 8.166666666666668e-06,
+      "loss": 0.5073,
+      "step": 1325
+    },
+    {
+      "epoch": 0.8623442989460236,
+      "grad_norm": 3.554313898086548,
+      "learning_rate": 8.111111111111112e-06,
+      "loss": 0.4663,
+      "step": 1350
+    },
+    {
+      "epoch": 0.8783136378153944,
+      "grad_norm": 4.958135604858398,
+      "learning_rate": 8.055555555555557e-06,
+      "loss": 0.4614,
+      "step": 1375
+    },
+    {
+      "epoch": 0.8942829766847652,
+      "grad_norm": 3.5128700733184814,
+      "learning_rate": 8.000000000000001e-06,
+      "loss": 0.4522,
+      "step": 1400
+    },
+    {
+      "epoch": 0.9102523155541361,
+      "grad_norm": 4.640254020690918,
+      "learning_rate": 7.944444444444445e-06,
+      "loss": 0.4488,
+      "step": 1425
+    },
+    {
+      "epoch": 0.9262216544235069,
+      "grad_norm": 2.928645610809326,
+      "learning_rate": 7.88888888888889e-06,
+      "loss": 0.4352,
+      "step": 1450
+    },
+    {
+      "epoch": 0.9421909932928777,
+      "grad_norm": 3.42668080329895,
+      "learning_rate": 7.833333333333333e-06,
+      "loss": 0.4648,
+      "step": 1475
+    },
+    {
+      "epoch": 0.9581603321622485,
+      "grad_norm": 3.204360008239746,
+      "learning_rate": 7.77777777777778e-06,
+      "loss": 0.4318,
+      "step": 1500
+    },
+    {
+      "epoch": 0.9741296710316193,
+      "grad_norm": 2.9323625564575195,
+      "learning_rate": 7.722222222222223e-06,
+      "loss": 0.4464,
+      "step": 1525
+    },
+    {
+      "epoch": 0.9900990099009901,
+      "grad_norm": 3.103804349899292,
+      "learning_rate": 7.666666666666667e-06,
+      "loss": 0.4599,
+      "step": 1550
+    },
+    {
+      "epoch": 1.006068348770361,
+      "grad_norm": 2.95595121383667,
+      "learning_rate": 7.611111111111111e-06,
+      "loss": 0.4017,
+      "step": 1575
+    },
+    {
+      "epoch": 1.0220376876397317,
+      "grad_norm": 2.5740716457366943,
+      "learning_rate": 7.555555555555556e-06,
+      "loss": 0.3208,
+      "step": 1600
+    },
+    {
+      "epoch": 1.0380070265091026,
+      "grad_norm": 3.569032907485962,
+      "learning_rate": 7.500000000000001e-06,
+      "loss": 0.3206,
+      "step": 1625
+    },
+    {
+      "epoch": 1.0539763653784733,
+      "grad_norm": 3.01520037651062,
+      "learning_rate": 7.444444444444445e-06,
+      "loss": 0.325,
+      "step": 1650
+    },
+    {
+      "epoch": 1.0699457042478442,
+      "grad_norm": 2.9796643257141113,
+      "learning_rate": 7.38888888888889e-06,
+      "loss": 0.3197,
+      "step": 1675
+    },
+    {
+      "epoch": 1.0859150431172149,
+      "grad_norm": 2.679849624633789,
+      "learning_rate": 7.333333333333333e-06,
+      "loss": 0.3305,
+      "step": 1700
+    },
+    {
+      "epoch": 1.1018843819865858,
+      "grad_norm": 2.760922908782959,
+      "learning_rate": 7.277777777777778e-06,
+      "loss": 0.3333,
+      "step": 1725
+    },
+    {
+      "epoch": 1.1178537208559565,
+      "grad_norm": 2.7702419757843018,
+      "learning_rate": 7.222222222222223e-06,
+      "loss": 0.3452,
+      "step": 1750
+    },
+    {
+      "epoch": 1.1338230597253274,
+      "grad_norm": 3.0105628967285156,
+      "learning_rate": 7.166666666666667e-06,
+      "loss": 0.3318,
+      "step": 1775
+    },
+    {
+      "epoch": 1.1497923985946983,
+      "grad_norm": 3.0848121643066406,
+      "learning_rate": 7.111111111111112e-06,
+      "loss": 0.314,
+      "step": 1800
+    },
+    {
+      "epoch": 1.165761737464069,
+      "grad_norm": 3.0407936573028564,
+      "learning_rate": 7.055555555555557e-06,
+      "loss": 0.339,
+      "step": 1825
+    },
+    {
+      "epoch": 1.1817310763334399,
+      "grad_norm": 2.692070722579956,
+      "learning_rate": 7e-06,
+      "loss": 0.3301,
+      "step": 1850
+    },
+    {
+      "epoch": 1.1977004152028106,
+      "grad_norm": 2.7906270027160645,
+      "learning_rate": 6.944444444444445e-06,
+      "loss": 0.3243,
+      "step": 1875
+    },
+    {
+      "epoch": 1.2136697540721815,
+      "grad_norm": 3.005059003829956,
+      "learning_rate": 6.88888888888889e-06,
+      "loss": 0.3348,
+      "step": 1900
+    },
+    {
+      "epoch": 1.2296390929415522,
+      "grad_norm": 2.512002944946289,
+      "learning_rate": 6.833333333333334e-06,
+      "loss": 0.3282,
+      "step": 1925
+    },
+    {
+      "epoch": 1.245608431810923,
+      "grad_norm": 2.2834126949310303,
+      "learning_rate": 6.777777777777779e-06,
+      "loss": 0.3184,
+      "step": 1950
+    },
+    {
+      "epoch": 1.2615777706802938,
+      "grad_norm": 2.6210882663726807,
+      "learning_rate": 6.7222222222222235e-06,
+      "loss": 0.3223,
+      "step": 1975
+    },
+    {
+      "epoch": 1.2775471095496647,
+      "grad_norm": 2.5650439262390137,
+      "learning_rate": 6.666666666666667e-06,
+      "loss": 0.3335,
+      "step": 2000
+    },
+    {
+      "epoch": 1.2775471095496647,
+      "eval_loss": 0.41682446002960205,
+      "eval_runtime": 1686.343,
+      "eval_samples_per_second": 2.313,
+      "eval_steps_per_second": 0.145,
+      "eval_wer": 0.31297691490389257,
+      "step": 2000
+    },
+    {
+      "epoch": 1.2935164484190356,
+      "grad_norm": 3.5779247283935547,
+      "learning_rate": 6.6111111111111115e-06,
+      "loss": 0.3204,
+      "step": 2025
+    },
+    {
+      "epoch": 1.3094857872884063,
+      "grad_norm": 2.3297035694122314,
+      "learning_rate": 6.555555555555556e-06,
+      "loss": 0.3237,
+      "step": 2050
+    },
+    {
+      "epoch": 1.325455126157777,
+      "grad_norm": 2.9575600624084473,
+      "learning_rate": 6.5000000000000004e-06,
+      "loss": 0.3333,
+      "step": 2075
+    },
+    {
+      "epoch": 1.3414244650271478,
+      "grad_norm": 2.6735305786132812,
+      "learning_rate": 6.444444444444445e-06,
+      "loss": 0.3199,
+      "step": 2100
+    },
+    {
+      "epoch": 1.3573938038965188,
+      "grad_norm": 3.5485923290252686,
+      "learning_rate": 6.3888888888888885e-06,
+      "loss": 0.3148,
+      "step": 2125
+    },
+    {
+      "epoch": 1.3733631427658894,
+      "grad_norm": 2.704557180404663,
+      "learning_rate": 6.333333333333333e-06,
+      "loss": 0.3268,
+      "step": 2150
+    },
+    {
+      "epoch": 1.3893324816352604,
+      "grad_norm": 2.417282819747925,
+      "learning_rate": 6.277777777777778e-06,
+      "loss": 0.3286,
+      "step": 2175
+    },
+    {
+      "epoch": 1.405301820504631,
+      "grad_norm": 2.5613949298858643,
+      "learning_rate": 6.222222222222223e-06,
+      "loss": 0.3182,
+      "step": 2200
+    },
+    {
+      "epoch": 1.421271159374002,
+      "grad_norm": 3.0761849880218506,
+      "learning_rate": 6.166666666666667e-06,
+      "loss": 0.329,
+      "step": 2225
+    },
+    {
+      "epoch": 1.4372404982433729,
+      "grad_norm": 2.764951229095459,
+      "learning_rate": 6.111111111111112e-06,
+      "loss": 0.3157,
+      "step": 2250
+    },
+    {
+      "epoch": 1.4532098371127435,
+      "grad_norm": 2.909294605255127,
+      "learning_rate": 6.055555555555555e-06,
+      "loss": 0.3291,
+      "step": 2275
+    },
+    {
+      "epoch": 1.4691791759821142,
+      "grad_norm": 3.374781370162964,
+      "learning_rate": 6e-06,
+      "loss": 0.3325,
+      "step": 2300
+    },
+    {
+      "epoch": 1.4851485148514851,
+      "grad_norm": 2.8524813652038574,
+      "learning_rate": 5.944444444444445e-06,
+      "loss": 0.3182,
+      "step": 2325
+    },
+    {
+      "epoch": 1.501117853720856,
+      "grad_norm": 3.066960573196411,
+      "learning_rate": 5.88888888888889e-06,
+      "loss": 0.3125,
+      "step": 2350
+    },
+    {
+      "epoch": 1.5170871925902267,
+      "grad_norm": 2.604304790496826,
+      "learning_rate": 5.833333333333334e-06,
+      "loss": 0.335,
+      "step": 2375
+    },
+    {
+      "epoch": 1.5330565314595974,
+      "grad_norm": 2.784276008605957,
+      "learning_rate": 5.777777777777778e-06,
+      "loss": 0.3318,
+      "step": 2400
+    },
+    {
+      "epoch": 1.5490258703289683,
+      "grad_norm": 3.203049898147583,
+      "learning_rate": 5.722222222222222e-06,
+      "loss": 0.3208,
+      "step": 2425
+    },
+    {
+      "epoch": 1.5649952091983392,
+      "grad_norm": 2.73185658454895,
+      "learning_rate": 5.666666666666667e-06,
+      "loss": 0.2981,
+      "step": 2450
+    },
+    {
+      "epoch": 1.5809645480677101,
+      "grad_norm": 2.6829750537872314,
+      "learning_rate": 5.611111111111112e-06,
+      "loss": 0.3191,
+      "step": 2475
+    },
+    {
+      "epoch": 1.5969338869370808,
+      "grad_norm": 3.1976850032806396,
+      "learning_rate": 5.555555555555557e-06,
+      "loss": 0.3335,
+      "step": 2500
+    },
+    {
+      "epoch": 1.6129032258064515,
+      "grad_norm": 3.003782033920288,
+      "learning_rate": 5.500000000000001e-06,
+      "loss": 0.3013,
+      "step": 2525
+    },
+    {
+      "epoch": 1.6288725646758224,
+      "grad_norm": 2.897247076034546,
+      "learning_rate": 5.444444444444445e-06,
+      "loss": 0.3123,
+      "step": 2550
+    },
+    {
+      "epoch": 1.6448419035451933,
+      "grad_norm": 2.996609687805176,
+      "learning_rate": 5.388888888888889e-06,
+      "loss": 0.3182,
+      "step": 2575
+    },
+    {
+      "epoch": 1.660811242414564,
+      "grad_norm": 2.700706720352173,
+      "learning_rate": 5.333333333333334e-06,
+      "loss": 0.312,
+      "step": 2600
+    },
+    {
+      "epoch": 1.6767805812839347,
+      "grad_norm": 2.901888370513916,
+      "learning_rate": 5.2777777777777785e-06,
+      "loss": 0.3088,
+      "step": 2625
+    },
+    {
+      "epoch": 1.6927499201533056,
+      "grad_norm": 2.958951711654663,
+      "learning_rate": 5.2222222222222226e-06,
+      "loss": 0.3283,
+      "step": 2650
+    },
+    {
+      "epoch": 1.7087192590226765,
+      "grad_norm": 3.610682964324951,
+      "learning_rate": 5.1666666666666675e-06,
+      "loss": 0.3332,
+      "step": 2675
+    },
+    {
+      "epoch": 1.7246885978920474,
+      "grad_norm": 2.292649745941162,
+      "learning_rate": 5.1111111111111115e-06,
+      "loss": 0.2918,
+      "step": 2700
+    },
+    {
+      "epoch": 1.7406579367614181,
+      "grad_norm": 2.41726016998291,
+      "learning_rate": 5.0555555555555555e-06,
+      "loss": 0.3215,
+      "step": 2725
+    },
+    {
+      "epoch": 1.7566272756307888,
+      "grad_norm": 2.4622721672058105,
+      "learning_rate": 5e-06,
+      "loss": 0.3025,
+      "step": 2750
+    },
+    {
+      "epoch": 1.7725966145001597,
+      "grad_norm": 2.802022695541382,
+      "learning_rate": 4.944444444444445e-06,
+      "loss": 0.3008,
+      "step": 2775
+    },
+    {
+      "epoch": 1.7885659533695306,
+      "grad_norm": 2.8401265144348145,
+      "learning_rate": 4.888888888888889e-06,
+      "loss": 0.3065,
+      "step": 2800
+    },
+    {
+      "epoch": 1.8045352922389013,
+      "grad_norm": 2.6890196800231934,
+      "learning_rate": 4.833333333333333e-06,
+      "loss": 0.3052,
+      "step": 2825
+    },
+    {
+      "epoch": 1.820504631108272,
+      "grad_norm": 3.257754325866699,
+      "learning_rate": 4.777777777777778e-06,
+      "loss": 0.3243,
+      "step": 2850
+    },
+    {
+      "epoch": 1.836473969977643,
+      "grad_norm": 2.595125675201416,
+      "learning_rate": 4.722222222222222e-06,
+      "loss": 0.316,
+      "step": 2875
+    },
+    {
+      "epoch": 1.8524433088470138,
+      "grad_norm": 2.713214159011841,
+      "learning_rate": 4.666666666666667e-06,
+      "loss": 0.3162,
+      "step": 2900
+    },
+    {
+      "epoch": 1.8684126477163845,
+      "grad_norm": 2.4735445976257324,
+      "learning_rate": 4.611111111111112e-06,
+      "loss": 0.2955,
+      "step": 2925
+    },
+    {
+      "epoch": 1.8843819865857554,
+      "grad_norm": 3.026921033859253,
+      "learning_rate": 4.557777777777778e-06,
+      "loss": 0.3058,
+      "step": 2950
+    },
+    {
+      "epoch": 1.900351325455126,
+      "grad_norm": 2.5658116340637207,
+      "learning_rate": 4.502222222222223e-06,
+      "loss": 0.3107,
+      "step": 2975
+    },
+    {
+      "epoch": 1.916320664324497,
+      "grad_norm": 2.6040449142456055,
+      "learning_rate": 4.446666666666667e-06,
+      "loss": 0.3281,
+      "step": 3000
+    },
+    {
+      "epoch": 1.916320664324497,
+      "eval_loss": 0.3877977430820465,
+      "eval_runtime": 1665.3573,
+      "eval_samples_per_second": 2.342,
+      "eval_steps_per_second": 0.147,
+      "eval_wer": 0.2965324060658746,
+      "step": 3000
+    },
+    {
+      "epoch": 1.932290003193868,
+      "grad_norm": 2.7531864643096924,
+      "learning_rate": 4.391111111111112e-06,
+      "loss": 0.3129,
+      "step": 3025
+    },
+    {
+      "epoch": 1.9482593420632386,
+      "grad_norm": 2.1554644107818604,
+      "learning_rate": 4.3355555555555565e-06,
+      "loss": 0.2996,
+      "step": 3050
+    },
+    {
+      "epoch": 1.9642286809326093,
+      "grad_norm": 2.7987213134765625,
+      "learning_rate": 4.2800000000000005e-06,
+      "loss": 0.3048,
+      "step": 3075
+    },
+    {
+      "epoch": 1.9801980198019802,
+      "grad_norm": 2.697014093399048,
+      "learning_rate": 4.2244444444444446e-06,
+      "loss": 0.3101,
+      "step": 3100
+    },
+    {
+      "epoch": 1.996167358671351,
+      "grad_norm": 2.2209935188293457,
+      "learning_rate": 4.168888888888889e-06,
+      "loss": 0.2935,
+      "step": 3125
+    },
+    {
+      "epoch": 2.012136697540722,
+      "grad_norm": 2.877319574356079,
+      "learning_rate": 4.1133333333333335e-06,
+      "loss": 0.2273,
+      "step": 3150
+    },
+    {
+      "epoch": 2.0281060364100925,
+      "grad_norm": 2.122941732406616,
+      "learning_rate": 4.057777777777778e-06,
+      "loss": 0.1945,
+      "step": 3175
+    },
+    {
+      "epoch": 2.0440753752794634,
+      "grad_norm": 2.2523446083068848,
+      "learning_rate": 4.002222222222222e-06,
+      "loss": 0.198,
+      "step": 3200
+    },
+    {
+      "epoch": 2.0600447141488343,
+      "grad_norm": 2.1842360496520996,
+      "learning_rate": 3.946666666666667e-06,
+      "loss": 0.1964,
+      "step": 3225
+    },
+    {
+      "epoch": 2.076014053018205,
+      "grad_norm": 2.5835633277893066,
+      "learning_rate": 3.891111111111111e-06,
+      "loss": 0.2093,
+      "step": 3250
+    },
+    {
+      "epoch": 2.0919833918875756,
+      "grad_norm": 2.534207344055176,
+      "learning_rate": 3.835555555555555e-06,
+      "loss": 0.193,
+      "step": 3275
+    },
+    {
+      "epoch": 2.1079527307569466,
+      "grad_norm": 3.2382678985595703,
+      "learning_rate": 3.7800000000000002e-06,
+      "loss": 0.2066,
+      "step": 3300
+    },
+    {
+      "epoch": 2.1239220696263175,
+      "grad_norm": 1.761076807975769,
+      "learning_rate": 3.724444444444445e-06,
+      "loss": 0.2032,
+      "step": 3325
+    },
+    {
+      "epoch": 2.1398914084956884,
+      "grad_norm": 2.6212522983551025,
+      "learning_rate": 3.668888888888889e-06,
+      "loss": 0.2018,
+      "step": 3350
+    },
+    {
+      "epoch": 2.1558607473650593,
+      "grad_norm": 2.240907669067383,
+      "learning_rate": 3.6133333333333336e-06,
+      "loss": 0.203,
+      "step": 3375
+    },
+    {
+      "epoch": 2.1718300862344297,
+      "grad_norm": 2.5074503421783447,
+      "learning_rate": 3.5577777777777785e-06,
+      "loss": 0.2029,
+      "step": 3400
+    },
+    {
+      "epoch": 2.1877994251038007,
+      "grad_norm": 2.3992538452148438,
+      "learning_rate": 3.5022222222222225e-06,
+      "loss": 0.1975,
+      "step": 3425
+    },
+    {
+      "epoch": 2.2037687639731716,
+      "grad_norm": 2.3788444995880127,
+      "learning_rate": 3.446666666666667e-06,
+      "loss": 0.2004,
+      "step": 3450
+    },
+    {
+      "epoch": 2.2197381028425425,
+      "grad_norm": 2.7057225704193115,
+      "learning_rate": 3.391111111111111e-06,
+      "loss": 0.1956,
+      "step": 3475
+    },
+    {
+      "epoch": 2.235707441711913,
+      "grad_norm": 2.399021863937378,
+      "learning_rate": 3.335555555555556e-06,
+      "loss": 0.2033,
+      "step": 3500
+    },
+    {
+      "epoch": 2.251676780581284,
+      "grad_norm": 2.3665523529052734,
+      "learning_rate": 3.2800000000000004e-06,
+      "loss": 0.203,
+      "step": 3525
+    },
+    {
+      "epoch": 2.2676461194506548,
+      "grad_norm": 2.4822895526885986,
+      "learning_rate": 3.2244444444444444e-06,
+      "loss": 0.1991,
+      "step": 3550
+    },
+    {
+      "epoch": 2.2836154583200257,
+      "grad_norm": 2.2787749767303467,
+      "learning_rate": 3.1688888888888893e-06,
+      "loss": 0.2024,
+      "step": 3575
+    },
+    {
+      "epoch": 2.2995847971893966,
+      "grad_norm": 2.3927159309387207,
+      "learning_rate": 3.1133333333333337e-06,
+      "loss": 0.1928,
+      "step": 3600
+    },
+    {
+      "epoch": 2.315554136058767,
+      "grad_norm": 2.311298370361328,
+      "learning_rate": 3.0577777777777778e-06,
+      "loss": 0.2231,
+      "step": 3625
+    },
+    {
+      "epoch": 2.331523474928138,
+      "grad_norm": 2.4807193279266357,
+      "learning_rate": 3.0022222222222227e-06,
+      "loss": 0.1938,
+      "step": 3650
+    },
+    {
+      "epoch": 2.347492813797509,
+      "grad_norm": 2.698840856552124,
+      "learning_rate": 2.946666666666667e-06,
+      "loss": 0.1954,
+      "step": 3675
+    },
+    {
+      "epoch": 2.3634621526668798,
+      "grad_norm": 2.193194627761841,
+      "learning_rate": 2.891111111111111e-06,
+      "loss": 0.2013,
+      "step": 3700
+    },
+    {
+      "epoch": 2.37943149153625,
+      "grad_norm": 2.5082945823669434,
+      "learning_rate": 2.835555555555556e-06,
+      "loss": 0.1963,
+      "step": 3725
+    },
+    {
+      "epoch": 2.395400830405621,
+      "grad_norm": 2.7638180255889893,
+      "learning_rate": 2.7800000000000005e-06,
+      "loss": 0.1919,
+      "step": 3750
+    },
+    {
+      "epoch": 2.411370169274992,
+      "grad_norm": 2.6804022789001465,
+      "learning_rate": 2.7244444444444445e-06,
+      "loss": 0.2008,
+      "step": 3775
+    },
+    {
+      "epoch": 2.427339508144363,
+      "grad_norm": 2.131471872329712,
+      "learning_rate": 2.6688888888888894e-06,
+      "loss": 0.1912,
+      "step": 3800
+    },
+    {
+      "epoch": 2.443308847013734,
+      "grad_norm": 2.964355707168579,
+      "learning_rate": 2.6133333333333334e-06,
+      "loss": 0.1943,
+      "step": 3825
+    },
+    {
+      "epoch": 2.4592781858831043,
+      "grad_norm": 2.4776132106781006,
+      "learning_rate": 2.557777777777778e-06,
+      "loss": 0.1916,
+      "step": 3850
+    },
+    {
+      "epoch": 2.4752475247524752,
+      "grad_norm": 2.556891441345215,
+      "learning_rate": 2.5022222222222224e-06,
+      "loss": 0.2041,
+      "step": 3875
+    },
+    {
+      "epoch": 2.491216863621846,
+      "grad_norm": 2.0888752937316895,
+      "learning_rate": 2.446666666666667e-06,
+      "loss": 0.2026,
+      "step": 3900
+    },
+    {
+      "epoch": 2.5071862024912166,
+      "grad_norm": 2.438257932662964,
+      "learning_rate": 2.3911111111111113e-06,
+      "loss": 0.2017,
+      "step": 3925
+    },
+    {
+      "epoch": 2.5231555413605875,
+      "grad_norm": 2.6804213523864746,
+      "learning_rate": 2.3355555555555557e-06,
+      "loss": 0.1957,
+      "step": 3950
+    },
+    {
+      "epoch": 2.5391248802299584,
+      "grad_norm": 2.409219264984131,
+      "learning_rate": 2.28e-06,
+      "loss": 0.201,
+      "step": 3975
+    },
+    {
+      "epoch": 2.5550942190993293,
+      "grad_norm": 2.4475369453430176,
+      "learning_rate": 2.2244444444444447e-06,
+      "loss": 0.1901,
+      "step": 4000
+    },
+    {
+      "epoch": 2.5550942190993293,
+      "eval_loss": 0.392954021692276,
+      "eval_runtime": 1661.0812,
+      "eval_samples_per_second": 2.348,
+      "eval_steps_per_second": 0.147,
+      "eval_wer": 0.2848932676518883,
+      "step": 4000
+    },
+    {
+      "epoch": 2.5710635579687002,
+      "grad_norm": 2.4416608810424805,
+      "learning_rate": 2.168888888888889e-06,
+      "loss": 0.1886,
+      "step": 4025
+    },
+    {
+      "epoch": 2.587032896838071,
+      "grad_norm": 2.4266738891601562,
+      "learning_rate": 2.1133333333333336e-06,
+      "loss": 0.1964,
+      "step": 4050
+    },
+    {
+      "epoch": 2.6030022357074416,
+      "grad_norm": 2.2254934310913086,
+      "learning_rate": 2.057777777777778e-06,
+      "loss": 0.193,
+      "step": 4075
+    },
+    {
+      "epoch": 2.6189715745768125,
+      "grad_norm": 2.269014596939087,
+      "learning_rate": 2.0022222222222225e-06,
+      "loss": 0.1911,
+      "step": 4100
+    },
+    {
+      "epoch": 2.6349409134461834,
+      "grad_norm": 2.8170831203460693,
+      "learning_rate": 1.9466666666666665e-06,
+      "loss": 0.1891,
+      "step": 4125
+    },
+    {
+      "epoch": 2.650910252315554,
+      "grad_norm": 2.5246517658233643,
+      "learning_rate": 1.8911111111111114e-06,
+      "loss": 0.2021,
+      "step": 4150
+    },
+    {
+      "epoch": 2.666879591184925,
+      "grad_norm": 2.7256410121917725,
+      "learning_rate": 1.8355555555555557e-06,
+      "loss": 0.1924,
+      "step": 4175
+    },
+    {
+      "epoch": 2.6828489300542957,
+      "grad_norm": 2.446582555770874,
+      "learning_rate": 1.7800000000000001e-06,
+      "loss": 0.1934,
+      "step": 4200
+    },
+    {
+      "epoch": 2.6988182689236666,
+      "grad_norm": 2.602579116821289,
+      "learning_rate": 1.7244444444444448e-06,
+      "loss": 0.188,
+      "step": 4225
+    },
+    {
+      "epoch": 2.7147876077930375,
+      "grad_norm": 2.2500104904174805,
+      "learning_rate": 1.668888888888889e-06,
+      "loss": 0.1951,
+      "step": 4250
+    },
+    {
+      "epoch": 2.7307569466624084,
+      "grad_norm": 2.7052624225616455,
+      "learning_rate": 1.6133333333333335e-06,
+      "loss": 0.2039,
+      "step": 4275
+    },
+    {
+      "epoch": 2.746726285531779,
+      "grad_norm": 2.827786922454834,
+      "learning_rate": 1.5577777777777777e-06,
+      "loss": 0.1859,
+      "step": 4300
+    },
+    {
+      "epoch": 2.76269562440115,
+      "grad_norm": 2.8331093788146973,
+      "learning_rate": 1.5022222222222224e-06,
+      "loss": 0.2028,
+      "step": 4325
+    },
+    {
+      "epoch": 2.7786649632705207,
+      "grad_norm": 2.4881205558776855,
+      "learning_rate": 1.4466666666666669e-06,
+      "loss": 0.2013,
+      "step": 4350
+    },
+    {
+      "epoch": 2.794634302139891,
+      "grad_norm": 2.725170612335205,
+      "learning_rate": 1.3911111111111111e-06,
+      "loss": 0.202,
+      "step": 4375
+    },
+    {
+      "epoch": 2.810603641009262,
+      "grad_norm": 2.9598212242126465,
+      "learning_rate": 1.3355555555555558e-06,
+      "loss": 0.1957,
+      "step": 4400
+    },
+    {
+      "epoch": 2.826572979878633,
+      "grad_norm": 2.1554248332977295,
+      "learning_rate": 1.28e-06,
+      "loss": 0.185,
+      "step": 4425
+    },
+    {
+      "epoch": 2.842542318748004,
+      "grad_norm": 2.447131872177124,
+      "learning_rate": 1.2244444444444445e-06,
+      "loss": 0.197,
+      "step": 4450
+    },
+    {
+      "epoch": 2.858511657617375,
+      "grad_norm": 2.1197404861450195,
+      "learning_rate": 1.168888888888889e-06,
+      "loss": 0.189,
+      "step": 4475
+    },
+    {
+      "epoch": 2.8744809964867457,
+      "grad_norm": 2.2641892433166504,
+      "learning_rate": 1.1133333333333334e-06,
+      "loss": 0.1825,
+      "step": 4500
+    },
+    {
+      "epoch": 2.890450335356116,
+      "grad_norm": 2.380861759185791,
+      "learning_rate": 1.0577777777777779e-06,
+      "loss": 0.186,
+      "step": 4525
+    },
+    {
+      "epoch": 2.906419674225487,
+      "grad_norm": 2.575404167175293,
+      "learning_rate": 1.0022222222222223e-06,
+      "loss": 0.194,
+      "step": 4550
+    },
+    {
+      "epoch": 2.922389013094858,
+      "grad_norm": 1.9462391138076782,
+      "learning_rate": 9.466666666666667e-07,
+      "loss": 0.2058,
+      "step": 4575
+    },
+    {
+      "epoch": 2.9383583519642285,
+      "grad_norm": 2.332577705383301,
+      "learning_rate": 8.911111111111112e-07,
+      "loss": 0.1864,
+      "step": 4600
+    },
+    {
+      "epoch": 2.9543276908335994,
+      "grad_norm": 2.345797300338745,
+      "learning_rate": 8.355555555555556e-07,
+      "loss": 0.1955,
+      "step": 4625
+    },
+    {
+      "epoch": 2.9702970297029703,
+      "grad_norm": 2.41668438911438,
+      "learning_rate": 7.8e-07,
+      "loss": 0.1804,
+      "step": 4650
+    },
+    {
+      "epoch": 2.986266368572341,
+      "grad_norm": 2.3288943767547607,
+      "learning_rate": 7.244444444444446e-07,
+      "loss": 0.1911,
+      "step": 4675
+    },
+    {
+      "epoch": 3.002235707441712,
+      "grad_norm": 1.3726705312728882,
+      "learning_rate": 6.68888888888889e-07,
+      "loss": 0.1877,
+      "step": 4700
+    },
+    {
+      "epoch": 3.0182050463110826,
+      "grad_norm": 2.3931479454040527,
+      "learning_rate": 6.133333333333333e-07,
+      "loss": 0.1261,
+      "step": 4725
+    },
+    {
+      "epoch": 3.0341743851804535,
+      "grad_norm": 1.6263169050216675,
+      "learning_rate": 5.577777777777779e-07,
+      "loss": 0.1247,
+      "step": 4750
+    },
+    {
+      "epoch": 3.0501437240498244,
+      "grad_norm": 1.9789073467254639,
+      "learning_rate": 5.022222222222222e-07,
+      "loss": 0.1259,
+      "step": 4775
+    },
+    {
+      "epoch": 3.0661130629191953,
+      "grad_norm": 2.2936391830444336,
+      "learning_rate": 4.466666666666667e-07,
+      "loss": 0.1318,
+      "step": 4800
+    },
+    {
+      "epoch": 3.0820824017885657,
+      "grad_norm": 2.009859800338745,
+      "learning_rate": 3.9111111111111115e-07,
+      "loss": 0.1346,
+      "step": 4825
+    },
+    {
+      "epoch": 3.0980517406579366,
+      "grad_norm": 2.502535104751587,
+      "learning_rate": 3.3555555555555556e-07,
+      "loss": 0.1312,
+      "step": 4850
+    },
+    {
+      "epoch": 3.1140210795273076,
+      "grad_norm": 2.6293087005615234,
+      "learning_rate": 2.8e-07,
+      "loss": 0.1277,
+      "step": 4875
+    },
+    {
+      "epoch": 3.1299904183966785,
+      "grad_norm": 2.0969161987304688,
+      "learning_rate": 2.2444444444444445e-07,
+      "loss": 0.1301,
+      "step": 4900
+    },
+    {
+      "epoch": 3.1459597572660494,
+      "grad_norm": 1.9938933849334717,
+      "learning_rate": 1.6888888888888888e-07,
+      "loss": 0.129,
+      "step": 4925
+    },
+    {
+      "epoch": 3.16192909613542,
+      "grad_norm": 2.1169307231903076,
+      "learning_rate": 1.1333333333333336e-07,
+      "loss": 0.134,
+      "step": 4950
+    },
+    {
+      "epoch": 3.1778984350047907,
+      "grad_norm": 2.5867435932159424,
+      "learning_rate": 5.777777777777778e-08,
+      "loss": 0.1379,
+      "step": 4975
+    },
+    {
+      "epoch": 3.1938677738741617,
+      "grad_norm": 2.1530654430389404,
+      "learning_rate": 2.2222222222222225e-09,
+      "loss": 0.1306,
+      "step": 5000
+    },
+    {
+      "epoch": 3.1938677738741617,
+      "eval_loss": 0.40369176864624023,
+      "eval_runtime": 1644.4554,
+      "eval_samples_per_second": 2.372,
+      "eval_steps_per_second": 0.148,
+      "eval_wer": 0.27909784603496574,
+      "step": 5000
+    },
+    {
+      "epoch": 3.1938677738741617,
+      "step": 5000,
+      "total_flos": 5.435895365546803e+20,
+      "train_loss": 0.3537434986591339,
+      "train_runtime": 57744.1557,
+      "train_samples_per_second": 2.771,
+      "train_steps_per_second": 0.087
+    }
+  ],
+  "logging_steps": 25,
+  "max_steps": 5000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 1000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 5.435895365546803e+20,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}