End of training

Browse files

Files changed (6) hide show

README.md +1 -1
all_results.json +15 -0
eval_results.json +9 -0
runs/Nov07_16-48-55_8cf4d7876f53/events.out.tfevents.1731058123.8cf4d7876f53.36.1 +3 -0
train_results.json +9 -0
trainer_state.json +1487 -0

README.md CHANGED Viewed

@@ -16,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
 # whisper-large-v3-turbo-ft-btb-cv-cy
-This model is a fine-tuned version of [openai/whisper-large-v3-turbo](https://huggingface.co/openai/whisper-large-v3-turbo) on an unknown dataset.
 It achieves the following results on the evaluation set:
 - Loss: 0.4072
 - Wer: 0.2801

 # whisper-large-v3-turbo-ft-btb-cv-cy
+This model is a fine-tuned version of [openai/whisper-large-v3-turbo](https://huggingface.co/openai/whisper-large-v3-turbo) on the DewiBrynJones/banc-trawsgrifiadau-bangor-clean train main, DewiBrynJones/commonvoice_18_0_cy train+dev main dataset.
 It achieves the following results on the evaluation set:
 - Loss: 0.4072
 - Wer: 0.2801

all_results.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+    "epoch": 3.1595576619273302,
+    "eval_loss": 0.4071974754333496,
+    "eval_runtime": 1291.4438,
+    "eval_samples": 3901,
+    "eval_samples_per_second": 3.021,
+    "eval_steps_per_second": 0.189,
+    "eval_wer": 0.2801361924079977,
+    "total_flos": 2.727921844224e+20,
+    "train_loss": 0.3573408980369568,
+    "train_runtime": 57234.8416,
+    "train_samples": 50640,
+    "train_samples_per_second": 2.796,
+    "train_steps_per_second": 0.087
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 3.1595576619273302,
+    "eval_loss": 0.4071974754333496,
+    "eval_runtime": 1291.4438,
+    "eval_samples": 3901,
+    "eval_samples_per_second": 3.021,
+    "eval_steps_per_second": 0.189,
+    "eval_wer": 0.2801361924079977
+}

runs/Nov07_16-48-55_8cf4d7876f53/events.out.tfevents.1731058123.8cf4d7876f53.36.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4eae7d20a5b201bc954bff1e5da205536fc0d3bf2bf76aaed17506f42caf1424
+size 406

train_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 3.1595576619273302,
+    "total_flos": 2.727921844224e+20,
+    "train_loss": 0.3573408980369568,
+    "train_runtime": 57234.8416,
+    "train_samples": 50640,
+    "train_samples_per_second": 2.796,
+    "train_steps_per_second": 0.087
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,1487 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.1595576619273302,
+  "eval_steps": 1000,
+  "global_step": 5000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.01579778830963665,
+      "grad_norm": 39.78215789794922,
+      "learning_rate": 4.6000000000000004e-07,
+      "loss": 1.9397,
+      "step": 25
+    },
+    {
+      "epoch": 0.0315955766192733,
+      "grad_norm": 27.938426971435547,
+      "learning_rate": 9.600000000000001e-07,
+      "loss": 1.2937,
+      "step": 50
+    },
+    {
+      "epoch": 0.04739336492890995,
+      "grad_norm": 25.550430297851562,
+      "learning_rate": 1.46e-06,
+      "loss": 0.9786,
+      "step": 75
+    },
+    {
+      "epoch": 0.0631911532385466,
+      "grad_norm": 26.635549545288086,
+      "learning_rate": 1.9600000000000003e-06,
+      "loss": 0.8818,
+      "step": 100
+    },
+    {
+      "epoch": 0.07898894154818326,
+      "grad_norm": 25.53322410583496,
+      "learning_rate": 2.46e-06,
+      "loss": 0.8233,
+      "step": 125
+    },
+    {
+      "epoch": 0.0947867298578199,
+      "grad_norm": 26.468778610229492,
+      "learning_rate": 2.96e-06,
+      "loss": 0.7945,
+      "step": 150
+    },
+    {
+      "epoch": 0.11058451816745656,
+      "grad_norm": 22.71087074279785,
+      "learning_rate": 3.46e-06,
+      "loss": 0.752,
+      "step": 175
+    },
+    {
+      "epoch": 0.1263823064770932,
+      "grad_norm": 21.494964599609375,
+      "learning_rate": 3.96e-06,
+      "loss": 0.7655,
+      "step": 200
+    },
+    {
+      "epoch": 0.14218009478672985,
+      "grad_norm": 22.558645248413086,
+      "learning_rate": 4.4600000000000005e-06,
+      "loss": 0.72,
+      "step": 225
+    },
+    {
+      "epoch": 0.1579778830963665,
+      "grad_norm": 22.013551712036133,
+      "learning_rate": 4.960000000000001e-06,
+      "loss": 0.6961,
+      "step": 250
+    },
+    {
+      "epoch": 0.17377567140600317,
+      "grad_norm": 25.887876510620117,
+      "learning_rate": 5.460000000000001e-06,
+      "loss": 0.6897,
+      "step": 275
+    },
+    {
+      "epoch": 0.1895734597156398,
+      "grad_norm": 19.806230545043945,
+      "learning_rate": 5.9600000000000005e-06,
+      "loss": 0.6839,
+      "step": 300
+    },
+    {
+      "epoch": 0.20537124802527645,
+      "grad_norm": 21.767576217651367,
+      "learning_rate": 6.460000000000001e-06,
+      "loss": 0.6767,
+      "step": 325
+    },
+    {
+      "epoch": 0.2211690363349131,
+      "grad_norm": 19.838890075683594,
+      "learning_rate": 6.96e-06,
+      "loss": 0.6637,
+      "step": 350
+    },
+    {
+      "epoch": 0.23696682464454977,
+      "grad_norm": 22.119140625,
+      "learning_rate": 7.4600000000000006e-06,
+      "loss": 0.6811,
+      "step": 375
+    },
+    {
+      "epoch": 0.2527646129541864,
+      "grad_norm": 21.972688674926758,
+      "learning_rate": 7.960000000000002e-06,
+      "loss": 0.6843,
+      "step": 400
+    },
+    {
+      "epoch": 0.2685624012638231,
+      "grad_norm": 21.99839973449707,
+      "learning_rate": 8.46e-06,
+      "loss": 0.6487,
+      "step": 425
+    },
+    {
+      "epoch": 0.2843601895734597,
+      "grad_norm": 18.968303680419922,
+      "learning_rate": 8.96e-06,
+      "loss": 0.6528,
+      "step": 450
+    },
+    {
+      "epoch": 0.3001579778830964,
+      "grad_norm": 20.77776336669922,
+      "learning_rate": 9.460000000000001e-06,
+      "loss": 0.6346,
+      "step": 475
+    },
+    {
+      "epoch": 0.315955766192733,
+      "grad_norm": 17.947195053100586,
+      "learning_rate": 9.960000000000001e-06,
+      "loss": 0.6105,
+      "step": 500
+    },
+    {
+      "epoch": 0.33175355450236965,
+      "grad_norm": 17.2167911529541,
+      "learning_rate": 9.94888888888889e-06,
+      "loss": 0.6399,
+      "step": 525
+    },
+    {
+      "epoch": 0.34755134281200634,
+      "grad_norm": 19.390045166015625,
+      "learning_rate": 9.893333333333334e-06,
+      "loss": 0.5825,
+      "step": 550
+    },
+    {
+      "epoch": 0.36334913112164297,
+      "grad_norm": 17.195106506347656,
+      "learning_rate": 9.837777777777778e-06,
+      "loss": 0.5895,
+      "step": 575
+    },
+    {
+      "epoch": 0.3791469194312796,
+      "grad_norm": 18.29102325439453,
+      "learning_rate": 9.782222222222222e-06,
+      "loss": 0.621,
+      "step": 600
+    },
+    {
+      "epoch": 0.3949447077409163,
+      "grad_norm": 16.213546752929688,
+      "learning_rate": 9.726666666666668e-06,
+      "loss": 0.587,
+      "step": 625
+    },
+    {
+      "epoch": 0.4107424960505529,
+      "grad_norm": 15.86738109588623,
+      "learning_rate": 9.671111111111112e-06,
+      "loss": 0.5765,
+      "step": 650
+    },
+    {
+      "epoch": 0.4265402843601896,
+      "grad_norm": 17.446897506713867,
+      "learning_rate": 9.617777777777778e-06,
+      "loss": 0.5914,
+      "step": 675
+    },
+    {
+      "epoch": 0.4423380726698262,
+      "grad_norm": 15.107172966003418,
+      "learning_rate": 9.562222222222223e-06,
+      "loss": 0.5517,
+      "step": 700
+    },
+    {
+      "epoch": 0.45813586097946285,
+      "grad_norm": 16.07261848449707,
+      "learning_rate": 9.506666666666667e-06,
+      "loss": 0.5523,
+      "step": 725
+    },
+    {
+      "epoch": 0.47393364928909953,
+      "grad_norm": 15.2976655960083,
+      "learning_rate": 9.451111111111112e-06,
+      "loss": 0.5419,
+      "step": 750
+    },
+    {
+      "epoch": 0.48973143759873616,
+      "grad_norm": 16.560869216918945,
+      "learning_rate": 9.395555555555556e-06,
+      "loss": 0.5763,
+      "step": 775
+    },
+    {
+      "epoch": 0.5055292259083728,
+      "grad_norm": 18.58966064453125,
+      "learning_rate": 9.340000000000002e-06,
+      "loss": 0.5497,
+      "step": 800
+    },
+    {
+      "epoch": 0.5213270142180095,
+      "grad_norm": 16.940420150756836,
+      "learning_rate": 9.284444444444444e-06,
+      "loss": 0.5177,
+      "step": 825
+    },
+    {
+      "epoch": 0.5371248025276462,
+      "grad_norm": 12.964641571044922,
+      "learning_rate": 9.22888888888889e-06,
+      "loss": 0.5293,
+      "step": 850
+    },
+    {
+      "epoch": 0.5529225908372828,
+      "grad_norm": 16.428470611572266,
+      "learning_rate": 9.173333333333334e-06,
+      "loss": 0.5062,
+      "step": 875
+    },
+    {
+      "epoch": 0.5687203791469194,
+      "grad_norm": 17.921024322509766,
+      "learning_rate": 9.117777777777778e-06,
+      "loss": 0.5471,
+      "step": 900
+    },
+    {
+      "epoch": 0.584518167456556,
+      "grad_norm": 14.068251609802246,
+      "learning_rate": 9.062222222222224e-06,
+      "loss": 0.5072,
+      "step": 925
+    },
+    {
+      "epoch": 0.6003159557661928,
+      "grad_norm": 13.403594017028809,
+      "learning_rate": 9.006666666666666e-06,
+      "loss": 0.5051,
+      "step": 950
+    },
+    {
+      "epoch": 0.6161137440758294,
+      "grad_norm": 14.557646751403809,
+      "learning_rate": 8.951111111111112e-06,
+      "loss": 0.5391,
+      "step": 975
+    },
+    {
+      "epoch": 0.631911532385466,
+      "grad_norm": 15.385436058044434,
+      "learning_rate": 8.895555555555556e-06,
+      "loss": 0.5037,
+      "step": 1000
+    },
+    {
+      "epoch": 0.631911532385466,
+      "eval_loss": 0.551193118095398,
+      "eval_runtime": 1363.4745,
+      "eval_samples_per_second": 2.861,
+      "eval_steps_per_second": 0.179,
+      "eval_wer": 0.38095238095238093,
+      "step": 1000
+    },
+    {
+      "epoch": 0.6477093206951027,
+      "grad_norm": 13.815781593322754,
+      "learning_rate": 8.84e-06,
+      "loss": 0.4888,
+      "step": 1025
+    },
+    {
+      "epoch": 0.6635071090047393,
+      "grad_norm": 16.213642120361328,
+      "learning_rate": 8.784444444444446e-06,
+      "loss": 0.4865,
+      "step": 1050
+    },
+    {
+      "epoch": 0.6793048973143759,
+      "grad_norm": 14.779074668884277,
+      "learning_rate": 8.72888888888889e-06,
+      "loss": 0.4963,
+      "step": 1075
+    },
+    {
+      "epoch": 0.6951026856240127,
+      "grad_norm": 13.794785499572754,
+      "learning_rate": 8.673333333333334e-06,
+      "loss": 0.482,
+      "step": 1100
+    },
+    {
+      "epoch": 0.7109004739336493,
+      "grad_norm": 14.701066970825195,
+      "learning_rate": 8.617777777777778e-06,
+      "loss": 0.5027,
+      "step": 1125
+    },
+    {
+      "epoch": 0.7266982622432859,
+      "grad_norm": 14.537113189697266,
+      "learning_rate": 8.562222222222224e-06,
+      "loss": 0.4798,
+      "step": 1150
+    },
+    {
+      "epoch": 0.7424960505529226,
+      "grad_norm": 14.886212348937988,
+      "learning_rate": 8.506666666666668e-06,
+      "loss": 0.4777,
+      "step": 1175
+    },
+    {
+      "epoch": 0.7582938388625592,
+      "grad_norm": 13.01016616821289,
+      "learning_rate": 8.451111111111112e-06,
+      "loss": 0.4736,
+      "step": 1200
+    },
+    {
+      "epoch": 0.7740916271721959,
+      "grad_norm": 14.213628768920898,
+      "learning_rate": 8.395555555555557e-06,
+      "loss": 0.4808,
+      "step": 1225
+    },
+    {
+      "epoch": 0.7898894154818326,
+      "grad_norm": 14.541191101074219,
+      "learning_rate": 8.34e-06,
+      "loss": 0.5008,
+      "step": 1250
+    },
+    {
+      "epoch": 0.8056872037914692,
+      "grad_norm": 14.092310905456543,
+      "learning_rate": 8.284444444444446e-06,
+      "loss": 0.4693,
+      "step": 1275
+    },
+    {
+      "epoch": 0.8214849921011058,
+      "grad_norm": 14.289324760437012,
+      "learning_rate": 8.22888888888889e-06,
+      "loss": 0.4418,
+      "step": 1300
+    },
+    {
+      "epoch": 0.8372827804107424,
+      "grad_norm": 13.257657051086426,
+      "learning_rate": 8.173333333333334e-06,
+      "loss": 0.4643,
+      "step": 1325
+    },
+    {
+      "epoch": 0.8530805687203792,
+      "grad_norm": 15.05517864227295,
+      "learning_rate": 8.11777777777778e-06,
+      "loss": 0.4751,
+      "step": 1350
+    },
+    {
+      "epoch": 0.8688783570300158,
+      "grad_norm": 13.352594375610352,
+      "learning_rate": 8.062222222222222e-06,
+      "loss": 0.442,
+      "step": 1375
+    },
+    {
+      "epoch": 0.8846761453396524,
+      "grad_norm": 12.487988471984863,
+      "learning_rate": 8.006666666666667e-06,
+      "loss": 0.4496,
+      "step": 1400
+    },
+    {
+      "epoch": 0.9004739336492891,
+      "grad_norm": 13.963912963867188,
+      "learning_rate": 7.951111111111111e-06,
+      "loss": 0.4415,
+      "step": 1425
+    },
+    {
+      "epoch": 0.9162717219589257,
+      "grad_norm": 13.90829086303711,
+      "learning_rate": 7.895555555555557e-06,
+      "loss": 0.4349,
+      "step": 1450
+    },
+    {
+      "epoch": 0.9320695102685624,
+      "grad_norm": 16.863481521606445,
+      "learning_rate": 7.840000000000001e-06,
+      "loss": 0.4722,
+      "step": 1475
+    },
+    {
+      "epoch": 0.9478672985781991,
+      "grad_norm": 13.991209983825684,
+      "learning_rate": 7.784444444444445e-06,
+      "loss": 0.4397,
+      "step": 1500
+    },
+    {
+      "epoch": 0.9636650868878357,
+      "grad_norm": 12.423737525939941,
+      "learning_rate": 7.72888888888889e-06,
+      "loss": 0.423,
+      "step": 1525
+    },
+    {
+      "epoch": 0.9794628751974723,
+      "grad_norm": 13.574849128723145,
+      "learning_rate": 7.673333333333333e-06,
+      "loss": 0.4472,
+      "step": 1550
+    },
+    {
+      "epoch": 0.995260663507109,
+      "grad_norm": 10.542879104614258,
+      "learning_rate": 7.617777777777778e-06,
+      "loss": 0.4319,
+      "step": 1575
+    },
+    {
+      "epoch": 1.0110584518167456,
+      "grad_norm": 12.750446319580078,
+      "learning_rate": 7.562222222222223e-06,
+      "loss": 0.3401,
+      "step": 1600
+    },
+    {
+      "epoch": 1.0268562401263823,
+      "grad_norm": 10.037585258483887,
+      "learning_rate": 7.506666666666668e-06,
+      "loss": 0.3236,
+      "step": 1625
+    },
+    {
+      "epoch": 1.042654028436019,
+      "grad_norm": 12.519506454467773,
+      "learning_rate": 7.451111111111111e-06,
+      "loss": 0.3379,
+      "step": 1650
+    },
+    {
+      "epoch": 1.0584518167456556,
+      "grad_norm": 11.666909217834473,
+      "learning_rate": 7.395555555555556e-06,
+      "loss": 0.3082,
+      "step": 1675
+    },
+    {
+      "epoch": 1.0742496050552923,
+      "grad_norm": 14.614953994750977,
+      "learning_rate": 7.340000000000001e-06,
+      "loss": 0.3272,
+      "step": 1700
+    },
+    {
+      "epoch": 1.0900473933649288,
+      "grad_norm": 11.243821144104004,
+      "learning_rate": 7.284444444444445e-06,
+      "loss": 0.3081,
+      "step": 1725
+    },
+    {
+      "epoch": 1.1058451816745656,
+      "grad_norm": 10.896337509155273,
+      "learning_rate": 7.22888888888889e-06,
+      "loss": 0.3212,
+      "step": 1750
+    },
+    {
+      "epoch": 1.1216429699842023,
+      "grad_norm": 11.159407615661621,
+      "learning_rate": 7.173333333333335e-06,
+      "loss": 0.3316,
+      "step": 1775
+    },
+    {
+      "epoch": 1.1374407582938388,
+      "grad_norm": 10.72888469696045,
+      "learning_rate": 7.117777777777778e-06,
+      "loss": 0.3465,
+      "step": 1800
+    },
+    {
+      "epoch": 1.1532385466034756,
+      "grad_norm": 11.136720657348633,
+      "learning_rate": 7.062222222222223e-06,
+      "loss": 0.3219,
+      "step": 1825
+    },
+    {
+      "epoch": 1.169036334913112,
+      "grad_norm": 11.062825202941895,
+      "learning_rate": 7.006666666666667e-06,
+      "loss": 0.3234,
+      "step": 1850
+    },
+    {
+      "epoch": 1.1848341232227488,
+      "grad_norm": 13.367472648620605,
+      "learning_rate": 6.951111111111112e-06,
+      "loss": 0.3215,
+      "step": 1875
+    },
+    {
+      "epoch": 1.2006319115323856,
+      "grad_norm": 12.034098625183105,
+      "learning_rate": 6.8955555555555565e-06,
+      "loss": 0.3195,
+      "step": 1900
+    },
+    {
+      "epoch": 1.216429699842022,
+      "grad_norm": 12.74405574798584,
+      "learning_rate": 6.8400000000000014e-06,
+      "loss": 0.3105,
+      "step": 1925
+    },
+    {
+      "epoch": 1.2322274881516588,
+      "grad_norm": 14.234502792358398,
+      "learning_rate": 6.784444444444445e-06,
+      "loss": 0.3327,
+      "step": 1950
+    },
+    {
+      "epoch": 1.2480252764612954,
+      "grad_norm": 12.721147537231445,
+      "learning_rate": 6.7288888888888895e-06,
+      "loss": 0.3405,
+      "step": 1975
+    },
+    {
+      "epoch": 1.263823064770932,
+      "grad_norm": 12.109272003173828,
+      "learning_rate": 6.6733333333333335e-06,
+      "loss": 0.3264,
+      "step": 2000
+    },
+    {
+      "epoch": 1.263823064770932,
+      "eval_loss": 0.4630681872367859,
+      "eval_runtime": 1327.3738,
+      "eval_samples_per_second": 2.939,
+      "eval_steps_per_second": 0.184,
+      "eval_wer": 0.331425673717763,
+      "step": 2000
+    },
+    {
+      "epoch": 1.2796208530805688,
+      "grad_norm": 10.781163215637207,
+      "learning_rate": 6.617777777777778e-06,
+      "loss": 0.3152,
+      "step": 2025
+    },
+    {
+      "epoch": 1.2954186413902053,
+      "grad_norm": 12.081149101257324,
+      "learning_rate": 6.562222222222223e-06,
+      "loss": 0.334,
+      "step": 2050
+    },
+    {
+      "epoch": 1.311216429699842,
+      "grad_norm": 11.082480430603027,
+      "learning_rate": 6.5066666666666665e-06,
+      "loss": 0.3182,
+      "step": 2075
+    },
+    {
+      "epoch": 1.3270142180094786,
+      "grad_norm": 10.400703430175781,
+      "learning_rate": 6.451111111111111e-06,
+      "loss": 0.3209,
+      "step": 2100
+    },
+    {
+      "epoch": 1.3428120063191153,
+      "grad_norm": 14.698405265808105,
+      "learning_rate": 6.395555555555556e-06,
+      "loss": 0.3358,
+      "step": 2125
+    },
+    {
+      "epoch": 1.358609794628752,
+      "grad_norm": 11.301855087280273,
+      "learning_rate": 6.34e-06,
+      "loss": 0.3274,
+      "step": 2150
+    },
+    {
+      "epoch": 1.3744075829383886,
+      "grad_norm": 13.15268611907959,
+      "learning_rate": 6.284444444444445e-06,
+      "loss": 0.3091,
+      "step": 2175
+    },
+    {
+      "epoch": 1.3902053712480253,
+      "grad_norm": 10.712764739990234,
+      "learning_rate": 6.22888888888889e-06,
+      "loss": 0.3217,
+      "step": 2200
+    },
+    {
+      "epoch": 1.4060031595576619,
+      "grad_norm": 9.865320205688477,
+      "learning_rate": 6.173333333333333e-06,
+      "loss": 0.3341,
+      "step": 2225
+    },
+    {
+      "epoch": 1.4218009478672986,
+      "grad_norm": 11.386091232299805,
+      "learning_rate": 6.117777777777778e-06,
+      "loss": 0.308,
+      "step": 2250
+    },
+    {
+      "epoch": 1.4375987361769353,
+      "grad_norm": 10.972213745117188,
+      "learning_rate": 6.062222222222223e-06,
+      "loss": 0.3184,
+      "step": 2275
+    },
+    {
+      "epoch": 1.4533965244865719,
+      "grad_norm": 9.059267044067383,
+      "learning_rate": 6.006666666666667e-06,
+      "loss": 0.3285,
+      "step": 2300
+    },
+    {
+      "epoch": 1.4691943127962086,
+      "grad_norm": 10.708878517150879,
+      "learning_rate": 5.951111111111112e-06,
+      "loss": 0.288,
+      "step": 2325
+    },
+    {
+      "epoch": 1.4849921011058451,
+      "grad_norm": 9.580412864685059,
+      "learning_rate": 5.895555555555557e-06,
+      "loss": 0.3133,
+      "step": 2350
+    },
+    {
+      "epoch": 1.5007898894154819,
+      "grad_norm": 13.611145973205566,
+      "learning_rate": 5.84e-06,
+      "loss": 0.3398,
+      "step": 2375
+    },
+    {
+      "epoch": 1.5165876777251186,
+      "grad_norm": 11.563826560974121,
+      "learning_rate": 5.784444444444445e-06,
+      "loss": 0.3187,
+      "step": 2400
+    },
+    {
+      "epoch": 1.5323854660347551,
+      "grad_norm": 11.612549781799316,
+      "learning_rate": 5.72888888888889e-06,
+      "loss": 0.3169,
+      "step": 2425
+    },
+    {
+      "epoch": 1.5481832543443916,
+      "grad_norm": 9.463521003723145,
+      "learning_rate": 5.673333333333334e-06,
+      "loss": 0.3124,
+      "step": 2450
+    },
+    {
+      "epoch": 1.5639810426540284,
+      "grad_norm": 14.442208290100098,
+      "learning_rate": 5.617777777777779e-06,
+      "loss": 0.3202,
+      "step": 2475
+    },
+    {
+      "epoch": 1.5797788309636651,
+      "grad_norm": 11.377900123596191,
+      "learning_rate": 5.562222222222222e-06,
+      "loss": 0.317,
+      "step": 2500
+    },
+    {
+      "epoch": 1.5955766192733019,
+      "grad_norm": 11.9774808883667,
+      "learning_rate": 5.506666666666667e-06,
+      "loss": 0.3188,
+      "step": 2525
+    },
+    {
+      "epoch": 1.6113744075829384,
+      "grad_norm": 11.492107391357422,
+      "learning_rate": 5.451111111111112e-06,
+      "loss": 0.3109,
+      "step": 2550
+    },
+    {
+      "epoch": 1.627172195892575,
+      "grad_norm": 12.987119674682617,
+      "learning_rate": 5.3955555555555565e-06,
+      "loss": 0.3127,
+      "step": 2575
+    },
+    {
+      "epoch": 1.6429699842022116,
+      "grad_norm": 11.403392791748047,
+      "learning_rate": 5.3400000000000005e-06,
+      "loss": 0.3007,
+      "step": 2600
+    },
+    {
+      "epoch": 1.6587677725118484,
+      "grad_norm": 10.354674339294434,
+      "learning_rate": 5.2844444444444454e-06,
+      "loss": 0.3129,
+      "step": 2625
+    },
+    {
+      "epoch": 1.674565560821485,
+      "grad_norm": 9.936201095581055,
+      "learning_rate": 5.228888888888889e-06,
+      "loss": 0.3226,
+      "step": 2650
+    },
+    {
+      "epoch": 1.6903633491311216,
+      "grad_norm": 10.426243782043457,
+      "learning_rate": 5.1733333333333335e-06,
+      "loss": 0.3021,
+      "step": 2675
+    },
+    {
+      "epoch": 1.7061611374407581,
+      "grad_norm": 10.784858703613281,
+      "learning_rate": 5.117777777777778e-06,
+      "loss": 0.2923,
+      "step": 2700
+    },
+    {
+      "epoch": 1.7219589257503949,
+      "grad_norm": 10.411828994750977,
+      "learning_rate": 5.062222222222222e-06,
+      "loss": 0.2893,
+      "step": 2725
+    },
+    {
+      "epoch": 1.7377567140600316,
+      "grad_norm": 12.027934074401855,
+      "learning_rate": 5.006666666666667e-06,
+      "loss": 0.3136,
+      "step": 2750
+    },
+    {
+      "epoch": 1.7535545023696684,
+      "grad_norm": 10.067774772644043,
+      "learning_rate": 4.951111111111111e-06,
+      "loss": 0.296,
+      "step": 2775
+    },
+    {
+      "epoch": 1.7693522906793049,
+      "grad_norm": 10.396674156188965,
+      "learning_rate": 4.895555555555556e-06,
+      "loss": 0.2973,
+      "step": 2800
+    },
+    {
+      "epoch": 1.7851500789889414,
+      "grad_norm": 9.719764709472656,
+      "learning_rate": 4.84e-06,
+      "loss": 0.2831,
+      "step": 2825
+    },
+    {
+      "epoch": 1.8009478672985781,
+      "grad_norm": 11.552470207214355,
+      "learning_rate": 4.784444444444445e-06,
+      "loss": 0.2802,
+      "step": 2850
+    },
+    {
+      "epoch": 1.8167456556082149,
+      "grad_norm": 10.932677268981934,
+      "learning_rate": 4.728888888888889e-06,
+      "loss": 0.3189,
+      "step": 2875
+    },
+    {
+      "epoch": 1.8325434439178516,
+      "grad_norm": 12.281967163085938,
+      "learning_rate": 4.673333333333333e-06,
+      "loss": 0.3014,
+      "step": 2900
+    },
+    {
+      "epoch": 1.8483412322274881,
+      "grad_norm": 12.78361988067627,
+      "learning_rate": 4.617777777777778e-06,
+      "loss": 0.3265,
+      "step": 2925
+    },
+    {
+      "epoch": 1.8641390205371247,
+      "grad_norm": 11.523568153381348,
+      "learning_rate": 4.562222222222222e-06,
+      "loss": 0.3062,
+      "step": 2950
+    },
+    {
+      "epoch": 1.8799368088467614,
+      "grad_norm": 12.009855270385742,
+      "learning_rate": 4.506666666666667e-06,
+      "loss": 0.294,
+      "step": 2975
+    },
+    {
+      "epoch": 1.8957345971563981,
+      "grad_norm": 11.980591773986816,
+      "learning_rate": 4.451111111111112e-06,
+      "loss": 0.2997,
+      "step": 3000
+    },
+    {
+      "epoch": 1.8957345971563981,
+      "eval_loss": 0.420254111289978,
+      "eval_runtime": 1304.4467,
+      "eval_samples_per_second": 2.991,
+      "eval_steps_per_second": 0.187,
+      "eval_wer": 0.30416304452815607,
+      "step": 3000
+    },
+    {
+      "epoch": 1.9115323854660349,
+      "grad_norm": 9.556490898132324,
+      "learning_rate": 4.395555555555556e-06,
+      "loss": 0.3128,
+      "step": 3025
+    },
+    {
+      "epoch": 1.9273301737756714,
+      "grad_norm": 12.445517539978027,
+      "learning_rate": 4.34e-06,
+      "loss": 0.2898,
+      "step": 3050
+    },
+    {
+      "epoch": 1.943127962085308,
+      "grad_norm": 11.485321998596191,
+      "learning_rate": 4.284444444444445e-06,
+      "loss": 0.2922,
+      "step": 3075
+    },
+    {
+      "epoch": 1.9589257503949447,
+      "grad_norm": 11.148347854614258,
+      "learning_rate": 4.228888888888889e-06,
+      "loss": 0.2941,
+      "step": 3100
+    },
+    {
+      "epoch": 1.9747235387045814,
+      "grad_norm": 8.813661575317383,
+      "learning_rate": 4.173333333333334e-06,
+      "loss": 0.2738,
+      "step": 3125
+    },
+    {
+      "epoch": 1.9905213270142181,
+      "grad_norm": 9.704155921936035,
+      "learning_rate": 4.117777777777779e-06,
+      "loss": 0.3064,
+      "step": 3150
+    },
+    {
+      "epoch": 2.006319115323855,
+      "grad_norm": 8.704689979553223,
+      "learning_rate": 4.062222222222223e-06,
+      "loss": 0.2478,
+      "step": 3175
+    },
+    {
+      "epoch": 2.022116903633491,
+      "grad_norm": 10.47323226928711,
+      "learning_rate": 4.006666666666667e-06,
+      "loss": 0.1972,
+      "step": 3200
+    },
+    {
+      "epoch": 2.037914691943128,
+      "grad_norm": 9.401123046875,
+      "learning_rate": 3.953333333333333e-06,
+      "loss": 0.2048,
+      "step": 3225
+    },
+    {
+      "epoch": 2.0537124802527646,
+      "grad_norm": 8.968587875366211,
+      "learning_rate": 3.897777777777778e-06,
+      "loss": 0.2047,
+      "step": 3250
+    },
+    {
+      "epoch": 2.0695102685624014,
+      "grad_norm": 10.138334274291992,
+      "learning_rate": 3.842222222222223e-06,
+      "loss": 0.2041,
+      "step": 3275
+    },
+    {
+      "epoch": 2.085308056872038,
+      "grad_norm": 9.157357215881348,
+      "learning_rate": 3.7866666666666667e-06,
+      "loss": 0.1983,
+      "step": 3300
+    },
+    {
+      "epoch": 2.1011058451816744,
+      "grad_norm": 11.541014671325684,
+      "learning_rate": 3.7311111111111116e-06,
+      "loss": 0.2102,
+      "step": 3325
+    },
+    {
+      "epoch": 2.116903633491311,
+      "grad_norm": 13.219682693481445,
+      "learning_rate": 3.675555555555556e-06,
+      "loss": 0.2032,
+      "step": 3350
+    },
+    {
+      "epoch": 2.132701421800948,
+      "grad_norm": 9.11551284790039,
+      "learning_rate": 3.62e-06,
+      "loss": 0.183,
+      "step": 3375
+    },
+    {
+      "epoch": 2.1484992101105846,
+      "grad_norm": 7.927098751068115,
+      "learning_rate": 3.564444444444445e-06,
+      "loss": 0.2022,
+      "step": 3400
+    },
+    {
+      "epoch": 2.1642969984202214,
+      "grad_norm": 10.464715003967285,
+      "learning_rate": 3.508888888888889e-06,
+      "loss": 0.1877,
+      "step": 3425
+    },
+    {
+      "epoch": 2.1800947867298577,
+      "grad_norm": 8.424384117126465,
+      "learning_rate": 3.4533333333333334e-06,
+      "loss": 0.1879,
+      "step": 3450
+    },
+    {
+      "epoch": 2.1958925750394944,
+      "grad_norm": 7.617179870605469,
+      "learning_rate": 3.3977777777777783e-06,
+      "loss": 0.1973,
+      "step": 3475
+    },
+    {
+      "epoch": 2.211690363349131,
+      "grad_norm": 9.299885749816895,
+      "learning_rate": 3.3422222222222224e-06,
+      "loss": 0.1982,
+      "step": 3500
+    },
+    {
+      "epoch": 2.227488151658768,
+      "grad_norm": 8.834092140197754,
+      "learning_rate": 3.286666666666667e-06,
+      "loss": 0.2003,
+      "step": 3525
+    },
+    {
+      "epoch": 2.2432859399684046,
+      "grad_norm": 8.07299518585205,
+      "learning_rate": 3.2311111111111117e-06,
+      "loss": 0.1971,
+      "step": 3550
+    },
+    {
+      "epoch": 2.259083728278041,
+      "grad_norm": 10.275826454162598,
+      "learning_rate": 3.1755555555555557e-06,
+      "loss": 0.1914,
+      "step": 3575
+    },
+    {
+      "epoch": 2.2748815165876777,
+      "grad_norm": 9.910749435424805,
+      "learning_rate": 3.12e-06,
+      "loss": 0.2067,
+      "step": 3600
+    },
+    {
+      "epoch": 2.2906793048973144,
+      "grad_norm": 10.053370475769043,
+      "learning_rate": 3.064444444444445e-06,
+      "loss": 0.2129,
+      "step": 3625
+    },
+    {
+      "epoch": 2.306477093206951,
+      "grad_norm": 10.744956970214844,
+      "learning_rate": 3.008888888888889e-06,
+      "loss": 0.1984,
+      "step": 3650
+    },
+    {
+      "epoch": 2.322274881516588,
+      "grad_norm": 9.880094528198242,
+      "learning_rate": 2.9533333333333336e-06,
+      "loss": 0.1852,
+      "step": 3675
+    },
+    {
+      "epoch": 2.338072669826224,
+      "grad_norm": 10.811684608459473,
+      "learning_rate": 2.8977777777777785e-06,
+      "loss": 0.2019,
+      "step": 3700
+    },
+    {
+      "epoch": 2.353870458135861,
+      "grad_norm": 12.169087409973145,
+      "learning_rate": 2.8422222222222225e-06,
+      "loss": 0.1944,
+      "step": 3725
+    },
+    {
+      "epoch": 2.3696682464454977,
+      "grad_norm": 10.1768217086792,
+      "learning_rate": 2.786666666666667e-06,
+      "loss": 0.2035,
+      "step": 3750
+    },
+    {
+      "epoch": 2.3854660347551344,
+      "grad_norm": 8.389801979064941,
+      "learning_rate": 2.7311111111111114e-06,
+      "loss": 0.2096,
+      "step": 3775
+    },
+    {
+      "epoch": 2.401263823064771,
+      "grad_norm": 9.511481285095215,
+      "learning_rate": 2.675555555555556e-06,
+      "loss": 0.1868,
+      "step": 3800
+    },
+    {
+      "epoch": 2.4170616113744074,
+      "grad_norm": 10.304895401000977,
+      "learning_rate": 2.6200000000000003e-06,
+      "loss": 0.2023,
+      "step": 3825
+    },
+    {
+      "epoch": 2.432859399684044,
+      "grad_norm": 11.822694778442383,
+      "learning_rate": 2.5644444444444444e-06,
+      "loss": 0.1938,
+      "step": 3850
+    },
+    {
+      "epoch": 2.448657187993681,
+      "grad_norm": 10.087789535522461,
+      "learning_rate": 2.5088888888888892e-06,
+      "loss": 0.1987,
+      "step": 3875
+    },
+    {
+      "epoch": 2.4644549763033177,
+      "grad_norm": 8.504409790039062,
+      "learning_rate": 2.4533333333333333e-06,
+      "loss": 0.1845,
+      "step": 3900
+    },
+    {
+      "epoch": 2.4802527646129544,
+      "grad_norm": 9.70301342010498,
+      "learning_rate": 2.397777777777778e-06,
+      "loss": 0.1832,
+      "step": 3925
+    },
+    {
+      "epoch": 2.4960505529225907,
+      "grad_norm": 9.534614562988281,
+      "learning_rate": 2.342222222222222e-06,
+      "loss": 0.182,
+      "step": 3950
+    },
+    {
+      "epoch": 2.5118483412322274,
+      "grad_norm": 8.997177124023438,
+      "learning_rate": 2.2866666666666667e-06,
+      "loss": 0.205,
+      "step": 3975
+    },
+    {
+      "epoch": 2.527646129541864,
+      "grad_norm": 9.527833938598633,
+      "learning_rate": 2.2311111111111115e-06,
+      "loss": 0.1851,
+      "step": 4000
+    },
+    {
+      "epoch": 2.527646129541864,
+      "eval_loss": 0.41017213463783264,
+      "eval_runtime": 1287.8118,
+      "eval_samples_per_second": 3.029,
+      "eval_steps_per_second": 0.189,
+      "eval_wer": 0.2870182555780933,
+      "step": 4000
+    },
+    {
+      "epoch": 2.543443917851501,
+      "grad_norm": 8.116750717163086,
+      "learning_rate": 2.1755555555555556e-06,
+      "loss": 0.1937,
+      "step": 4025
+    },
+    {
+      "epoch": 2.5592417061611377,
+      "grad_norm": 11.475099563598633,
+      "learning_rate": 2.12e-06,
+      "loss": 0.1795,
+      "step": 4050
+    },
+    {
+      "epoch": 2.575039494470774,
+      "grad_norm": 7.422430038452148,
+      "learning_rate": 2.064444444444445e-06,
+      "loss": 0.2029,
+      "step": 4075
+    },
+    {
+      "epoch": 2.5908372827804107,
+      "grad_norm": 8.631953239440918,
+      "learning_rate": 2.008888888888889e-06,
+      "loss": 0.1883,
+      "step": 4100
+    },
+    {
+      "epoch": 2.6066350710900474,
+      "grad_norm": 9.813713073730469,
+      "learning_rate": 1.9533333333333334e-06,
+      "loss": 0.1901,
+      "step": 4125
+    },
+    {
+      "epoch": 2.622432859399684,
+      "grad_norm": 10.284896850585938,
+      "learning_rate": 1.8977777777777779e-06,
+      "loss": 0.183,
+      "step": 4150
+    },
+    {
+      "epoch": 2.638230647709321,
+      "grad_norm": 9.403543472290039,
+      "learning_rate": 1.8422222222222225e-06,
+      "loss": 0.1892,
+      "step": 4175
+    },
+    {
+      "epoch": 2.654028436018957,
+      "grad_norm": 9.446948051452637,
+      "learning_rate": 1.7866666666666668e-06,
+      "loss": 0.1886,
+      "step": 4200
+    },
+    {
+      "epoch": 2.669826224328594,
+      "grad_norm": 10.583983421325684,
+      "learning_rate": 1.7311111111111112e-06,
+      "loss": 0.2211,
+      "step": 4225
+    },
+    {
+      "epoch": 2.6856240126382307,
+      "grad_norm": 10.528802871704102,
+      "learning_rate": 1.675555555555556e-06,
+      "loss": 0.1937,
+      "step": 4250
+    },
+    {
+      "epoch": 2.7014218009478674,
+      "grad_norm": 8.71202278137207,
+      "learning_rate": 1.6200000000000002e-06,
+      "loss": 0.1936,
+      "step": 4275
+    },
+    {
+      "epoch": 2.717219589257504,
+      "grad_norm": 8.44046401977539,
+      "learning_rate": 1.5644444444444446e-06,
+      "loss": 0.1994,
+      "step": 4300
+    },
+    {
+      "epoch": 2.7330173775671405,
+      "grad_norm": 9.856565475463867,
+      "learning_rate": 1.5088888888888889e-06,
+      "loss": 0.1753,
+      "step": 4325
+    },
+    {
+      "epoch": 2.748815165876777,
+      "grad_norm": 9.842414855957031,
+      "learning_rate": 1.4533333333333335e-06,
+      "loss": 0.1815,
+      "step": 4350
+    },
+    {
+      "epoch": 2.764612954186414,
+      "grad_norm": 9.333725929260254,
+      "learning_rate": 1.397777777777778e-06,
+      "loss": 0.1851,
+      "step": 4375
+    },
+    {
+      "epoch": 2.7804107424960507,
+      "grad_norm": 9.97825813293457,
+      "learning_rate": 1.3422222222222222e-06,
+      "loss": 0.1815,
+      "step": 4400
+    },
+    {
+      "epoch": 2.7962085308056874,
+      "grad_norm": 9.474321365356445,
+      "learning_rate": 1.286666666666667e-06,
+      "loss": 0.1792,
+      "step": 4425
+    },
+    {
+      "epoch": 2.8120063191153237,
+      "grad_norm": 8.71677303314209,
+      "learning_rate": 1.2311111111111112e-06,
+      "loss": 0.1921,
+      "step": 4450
+    },
+    {
+      "epoch": 2.8278041074249605,
+      "grad_norm": 9.69323444366455,
+      "learning_rate": 1.1755555555555556e-06,
+      "loss": 0.1778,
+      "step": 4475
+    },
+    {
+      "epoch": 2.843601895734597,
+      "grad_norm": 9.335270881652832,
+      "learning_rate": 1.12e-06,
+      "loss": 0.1783,
+      "step": 4500
+    },
+    {
+      "epoch": 2.859399684044234,
+      "grad_norm": 8.661324501037598,
+      "learning_rate": 1.0644444444444445e-06,
+      "loss": 0.1689,
+      "step": 4525
+    },
+    {
+      "epoch": 2.8751974723538707,
+      "grad_norm": 9.32027530670166,
+      "learning_rate": 1.008888888888889e-06,
+      "loss": 0.1772,
+      "step": 4550
+    },
+    {
+      "epoch": 2.890995260663507,
+      "grad_norm": 8.178330421447754,
+      "learning_rate": 9.533333333333335e-07,
+      "loss": 0.1806,
+      "step": 4575
+    },
+    {
+      "epoch": 2.9067930489731437,
+      "grad_norm": 9.38011646270752,
+      "learning_rate": 8.977777777777778e-07,
+      "loss": 0.1753,
+      "step": 4600
+    },
+    {
+      "epoch": 2.9225908372827805,
+      "grad_norm": 9.022958755493164,
+      "learning_rate": 8.422222222222224e-07,
+      "loss": 0.1813,
+      "step": 4625
+    },
+    {
+      "epoch": 2.938388625592417,
+      "grad_norm": 9.93110466003418,
+      "learning_rate": 7.866666666666667e-07,
+      "loss": 0.1825,
+      "step": 4650
+    },
+    {
+      "epoch": 2.954186413902054,
+      "grad_norm": 9.306452751159668,
+      "learning_rate": 7.311111111111112e-07,
+      "loss": 0.1943,
+      "step": 4675
+    },
+    {
+      "epoch": 2.9699842022116902,
+      "grad_norm": 7.849850177764893,
+      "learning_rate": 6.755555555555555e-07,
+      "loss": 0.1696,
+      "step": 4700
+    },
+    {
+      "epoch": 2.985781990521327,
+      "grad_norm": 10.920326232910156,
+      "learning_rate": 6.200000000000001e-07,
+      "loss": 0.1788,
+      "step": 4725
+    },
+    {
+      "epoch": 3.0015797788309637,
+      "grad_norm": 7.569627285003662,
+      "learning_rate": 5.644444444444445e-07,
+      "loss": 0.1698,
+      "step": 4750
+    },
+    {
+      "epoch": 3.0173775671406005,
+      "grad_norm": 8.557785987854004,
+      "learning_rate": 5.088888888888889e-07,
+      "loss": 0.1323,
+      "step": 4775
+    },
+    {
+      "epoch": 3.0331753554502368,
+      "grad_norm": 7.7341132164001465,
+      "learning_rate": 4.533333333333334e-07,
+      "loss": 0.1288,
+      "step": 4800
+    },
+    {
+      "epoch": 3.0489731437598735,
+      "grad_norm": 6.455957889556885,
+      "learning_rate": 3.9777777777777783e-07,
+      "loss": 0.1253,
+      "step": 4825
+    },
+    {
+      "epoch": 3.0647709320695102,
+      "grad_norm": 9.151886940002441,
+      "learning_rate": 3.422222222222223e-07,
+      "loss": 0.1369,
+      "step": 4850
+    },
+    {
+      "epoch": 3.080568720379147,
+      "grad_norm": 8.11299991607666,
+      "learning_rate": 2.866666666666667e-07,
+      "loss": 0.1272,
+      "step": 4875
+    },
+    {
+      "epoch": 3.0963665086887837,
+      "grad_norm": 6.720188617706299,
+      "learning_rate": 2.3111111111111112e-07,
+      "loss": 0.1329,
+      "step": 4900
+    },
+    {
+      "epoch": 3.11216429699842,
+      "grad_norm": 7.401381969451904,
+      "learning_rate": 1.7555555555555558e-07,
+      "loss": 0.1257,
+      "step": 4925
+    },
+    {
+      "epoch": 3.1279620853080567,
+      "grad_norm": 7.703917026519775,
+      "learning_rate": 1.2000000000000002e-07,
+      "loss": 0.1224,
+      "step": 4950
+    },
+    {
+      "epoch": 3.1437598736176935,
+      "grad_norm": 7.16009521484375,
+      "learning_rate": 6.444444444444445e-08,
+      "loss": 0.1296,
+      "step": 4975
+    },
+    {
+      "epoch": 3.1595576619273302,
+      "grad_norm": 7.600194931030273,
+      "learning_rate": 8.88888888888889e-09,
+      "loss": 0.1313,
+      "step": 5000
+    },
+    {
+      "epoch": 3.1595576619273302,
+      "eval_loss": 0.4071974754333496,
+      "eval_runtime": 1289.8235,
+      "eval_samples_per_second": 3.024,
+      "eval_steps_per_second": 0.189,
+      "eval_wer": 0.2801361924079977,
+      "step": 5000
+    },
+    {
+      "epoch": 3.1595576619273302,
+      "step": 5000,
+      "total_flos": 2.727921844224e+20,
+      "train_loss": 0.3573408980369568,
+      "train_runtime": 57234.8416,
+      "train_samples_per_second": 2.796,
+      "train_steps_per_second": 0.087
+    }
+  ],
+  "logging_steps": 25,
+  "max_steps": 5000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 1000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2.727921844224e+20,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}