End of training

Browse files

Files changed (7) hide show

README.md +19 -6
all_results.json +13 -0
eval_results.json +8 -0
train_results.json +8 -0
trainer_state.json +2932 -0
wandb/run-20250217_214618-7bygcjmf/files/output.log +158 -0
wandb/run-20250217_214618-7bygcjmf/run-7bygcjmf.wandb +2 -2

README.md CHANGED Viewed

@@ -3,23 +3,36 @@ library_name: transformers
 license: apache-2.0
 base_model: openai/whisper-tiny
 tags:
 - generated_from_trainer
 metrics:
 - wer
 model-index:
-- name: openai/whisper-tiny
-  results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
-# openai/whisper-tiny
-This model is a fine-tuned version of [openai/whisper-tiny](https://huggingface.co/openai/whisper-tiny) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.2992
-- Wer: 15.7240
 ## Model description

 license: apache-2.0
 base_model: openai/whisper-tiny
 tags:
+- whisper-event
 - generated_from_trainer
+datasets:
+- asierhv/composite_corpus_eu_v2.1
 metrics:
 - wer
 model-index:
+- name: Whisper Tiny Basque
+  results:
+  - task:
+      name: Automatic Speech Recognition
+      type: automatic-speech-recognition
+    dataset:
+      name: asierhv/composite_corpus_eu_v2.1
+      type: asierhv/composite_corpus_eu_v2.1
+    metrics:
+    - name: Wer
+      type: wer
+      value: 14.985509956062447
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
+# Whisper Tiny Basque
+This model is a fine-tuned version of [openai/whisper-tiny](https://huggingface.co/openai/whisper-tiny) on the asierhv/composite_corpus_eu_v2.1 dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.3002
+- Wer: 14.9855
 ## Model description

all_results.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+    "epoch": 1.0,
+    "eval_loss": 0.300187349319458,
+    "eval_runtime": 56.9322,
+    "eval_samples_per_second": 36.956,
+    "eval_steps_per_second": 2.319,
+    "eval_wer": 14.985509956062447,
+    "total_flos": 7.8780432384e+18,
+    "train_loss": 0.24547564173936845,
+    "train_runtime": 3924.8092,
+    "train_samples_per_second": 81.533,
+    "train_steps_per_second": 2.548
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 1.0,
+    "eval_loss": 0.300187349319458,
+    "eval_runtime": 56.9322,
+    "eval_samples_per_second": 36.956,
+    "eval_steps_per_second": 2.319,
+    "eval_wer": 14.985509956062447
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 1.0,
+    "total_flos": 7.8780432384e+18,
+    "train_loss": 0.24547564173936845,
+    "train_runtime": 3924.8092,
+    "train_samples_per_second": 81.533,
+    "train_steps_per_second": 2.548
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,2932 @@

+{
+  "best_metric": 14.985509956062447,
+  "best_model_checkpoint": "./checkpoint-8000",
+  "epoch": 1.0,
+  "eval_steps": 1000,
+  "global_step": 10000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0025,
+      "grad_norm": 32.178646087646484,
+      "learning_rate": 7.875e-07,
+      "loss": 3.5417,
+      "step": 25
+    },
+    {
+      "epoch": 0.005,
+      "grad_norm": 17.33567237854004,
+      "learning_rate": 1.7249999999999998e-06,
+      "loss": 2.8499,
+      "step": 50
+    },
+    {
+      "epoch": 0.0075,
+      "grad_norm": 13.790657043457031,
+      "learning_rate": 2.6624999999999995e-06,
+      "loss": 1.9843,
+      "step": 75
+    },
+    {
+      "epoch": 0.01,
+      "grad_norm": 10.804696083068848,
+      "learning_rate": 3.6e-06,
+      "loss": 1.6114,
+      "step": 100
+    },
+    {
+      "epoch": 0.0125,
+      "grad_norm": 9.405135154724121,
+      "learning_rate": 4.537499999999999e-06,
+      "loss": 1.3704,
+      "step": 125
+    },
+    {
+      "epoch": 0.015,
+      "grad_norm": 8.391704559326172,
+      "learning_rate": 5.474999999999999e-06,
+      "loss": 1.1665,
+      "step": 150
+    },
+    {
+      "epoch": 0.0175,
+      "grad_norm": 9.137776374816895,
+      "learning_rate": 6.4125e-06,
+      "loss": 1.0771,
+      "step": 175
+    },
+    {
+      "epoch": 0.02,
+      "grad_norm": 9.398157119750977,
+      "learning_rate": 7.35e-06,
+      "loss": 1.0148,
+      "step": 200
+    },
+    {
+      "epoch": 0.0225,
+      "grad_norm": 7.459103107452393,
+      "learning_rate": 8.2875e-06,
+      "loss": 0.8874,
+      "step": 225
+    },
+    {
+      "epoch": 0.025,
+      "grad_norm": 10.148730278015137,
+      "learning_rate": 9.224999999999999e-06,
+      "loss": 0.8913,
+      "step": 250
+    },
+    {
+      "epoch": 0.0275,
+      "grad_norm": 8.423101425170898,
+      "learning_rate": 1.01625e-05,
+      "loss": 0.8811,
+      "step": 275
+    },
+    {
+      "epoch": 0.03,
+      "grad_norm": 9.672224998474121,
+      "learning_rate": 1.1099999999999999e-05,
+      "loss": 0.8374,
+      "step": 300
+    },
+    {
+      "epoch": 0.0325,
+      "grad_norm": 7.847349166870117,
+      "learning_rate": 1.20375e-05,
+      "loss": 0.7848,
+      "step": 325
+    },
+    {
+      "epoch": 0.035,
+      "grad_norm": 7.796125888824463,
+      "learning_rate": 1.2974999999999999e-05,
+      "loss": 0.7056,
+      "step": 350
+    },
+    {
+      "epoch": 0.0375,
+      "grad_norm": 8.002777099609375,
+      "learning_rate": 1.39125e-05,
+      "loss": 0.6117,
+      "step": 375
+    },
+    {
+      "epoch": 0.04,
+      "grad_norm": 6.5904340744018555,
+      "learning_rate": 1.485e-05,
+      "loss": 0.584,
+      "step": 400
+    },
+    {
+      "epoch": 0.0425,
+      "grad_norm": 7.075273513793945,
+      "learning_rate": 1.5787499999999997e-05,
+      "loss": 0.5353,
+      "step": 425
+    },
+    {
+      "epoch": 0.045,
+      "grad_norm": 6.499445915222168,
+      "learning_rate": 1.6725e-05,
+      "loss": 0.5099,
+      "step": 450
+    },
+    {
+      "epoch": 0.0475,
+      "grad_norm": 6.824000358581543,
+      "learning_rate": 1.76625e-05,
+      "loss": 0.4846,
+      "step": 475
+    },
+    {
+      "epoch": 0.05,
+      "grad_norm": 6.962899208068848,
+      "learning_rate": 1.8599999999999998e-05,
+      "loss": 0.4592,
+      "step": 500
+    },
+    {
+      "epoch": 0.0525,
+      "grad_norm": 7.1536030769348145,
+      "learning_rate": 1.95375e-05,
+      "loss": 0.4266,
+      "step": 525
+    },
+    {
+      "epoch": 0.055,
+      "grad_norm": 5.634274005889893,
+      "learning_rate": 2.0475e-05,
+      "loss": 0.4271,
+      "step": 550
+    },
+    {
+      "epoch": 0.0575,
+      "grad_norm": 5.969743728637695,
+      "learning_rate": 2.1412499999999995e-05,
+      "loss": 0.4049,
+      "step": 575
+    },
+    {
+      "epoch": 0.06,
+      "grad_norm": 8.386860847473145,
+      "learning_rate": 2.2349999999999998e-05,
+      "loss": 0.3963,
+      "step": 600
+    },
+    {
+      "epoch": 0.0625,
+      "grad_norm": 6.4440083503723145,
+      "learning_rate": 2.3287499999999997e-05,
+      "loss": 0.3566,
+      "step": 625
+    },
+    {
+      "epoch": 0.065,
+      "grad_norm": 5.125731468200684,
+      "learning_rate": 2.4225e-05,
+      "loss": 0.3747,
+      "step": 650
+    },
+    {
+      "epoch": 0.0675,
+      "grad_norm": 5.34471321105957,
+      "learning_rate": 2.51625e-05,
+      "loss": 0.3635,
+      "step": 675
+    },
+    {
+      "epoch": 0.07,
+      "grad_norm": 6.247539520263672,
+      "learning_rate": 2.6099999999999997e-05,
+      "loss": 0.3597,
+      "step": 700
+    },
+    {
+      "epoch": 0.0725,
+      "grad_norm": 8.403606414794922,
+      "learning_rate": 2.7037499999999997e-05,
+      "loss": 0.3993,
+      "step": 725
+    },
+    {
+      "epoch": 0.075,
+      "grad_norm": 6.951033115386963,
+      "learning_rate": 2.7975e-05,
+      "loss": 0.5305,
+      "step": 750
+    },
+    {
+      "epoch": 0.0775,
+      "grad_norm": 6.673659324645996,
+      "learning_rate": 2.8912499999999998e-05,
+      "loss": 0.5134,
+      "step": 775
+    },
+    {
+      "epoch": 0.08,
+      "grad_norm": 7.272465229034424,
+      "learning_rate": 2.985e-05,
+      "loss": 0.5594,
+      "step": 800
+    },
+    {
+      "epoch": 0.0825,
+      "grad_norm": 5.904059410095215,
+      "learning_rate": 3.0787499999999996e-05,
+      "loss": 0.3991,
+      "step": 825
+    },
+    {
+      "epoch": 0.085,
+      "grad_norm": 4.807435035705566,
+      "learning_rate": 3.1725e-05,
+      "loss": 0.3297,
+      "step": 850
+    },
+    {
+      "epoch": 0.0875,
+      "grad_norm": 4.8213419914245605,
+      "learning_rate": 3.2662499999999994e-05,
+      "loss": 0.3153,
+      "step": 875
+    },
+    {
+      "epoch": 0.09,
+      "grad_norm": 6.322027206420898,
+      "learning_rate": 3.36e-05,
+      "loss": 0.4047,
+      "step": 900
+    },
+    {
+      "epoch": 0.0925,
+      "grad_norm": 7.58137321472168,
+      "learning_rate": 3.45375e-05,
+      "loss": 0.4779,
+      "step": 925
+    },
+    {
+      "epoch": 0.095,
+      "grad_norm": 6.797820568084717,
+      "learning_rate": 3.5474999999999995e-05,
+      "loss": 0.458,
+      "step": 950
+    },
+    {
+      "epoch": 0.0975,
+      "grad_norm": 7.3672332763671875,
+      "learning_rate": 3.64125e-05,
+      "loss": 0.6595,
+      "step": 975
+    },
+    {
+      "epoch": 0.1,
+      "grad_norm": 7.559723854064941,
+      "learning_rate": 3.735e-05,
+      "loss": 0.5862,
+      "step": 1000
+    },
+    {
+      "epoch": 0.1,
+      "eval_loss": 0.5658391118049622,
+      "eval_runtime": 61.7179,
+      "eval_samples_per_second": 34.091,
+      "eval_steps_per_second": 2.139,
+      "eval_wer": 33.845938113489765,
+      "step": 1000
+    },
+    {
+      "epoch": 0.1025,
+      "grad_norm": 6.748581409454346,
+      "learning_rate": 3.74125e-05,
+      "loss": 0.5033,
+      "step": 1025
+    },
+    {
+      "epoch": 0.105,
+      "grad_norm": 5.543668270111084,
+      "learning_rate": 3.730833333333333e-05,
+      "loss": 0.3956,
+      "step": 1050
+    },
+    {
+      "epoch": 0.1075,
+      "grad_norm": 4.64129638671875,
+      "learning_rate": 3.7204166666666665e-05,
+      "loss": 0.3021,
+      "step": 1075
+    },
+    {
+      "epoch": 0.11,
+      "grad_norm": 4.491319179534912,
+      "learning_rate": 3.7099999999999994e-05,
+      "loss": 0.2538,
+      "step": 1100
+    },
+    {
+      "epoch": 0.1125,
+      "grad_norm": 4.252546787261963,
+      "learning_rate": 3.699583333333333e-05,
+      "loss": 0.2439,
+      "step": 1125
+    },
+    {
+      "epoch": 0.115,
+      "grad_norm": 4.188290119171143,
+      "learning_rate": 3.6891666666666664e-05,
+      "loss": 0.2602,
+      "step": 1150
+    },
+    {
+      "epoch": 0.1175,
+      "grad_norm": 3.9564859867095947,
+      "learning_rate": 3.678749999999999e-05,
+      "loss": 0.2595,
+      "step": 1175
+    },
+    {
+      "epoch": 0.12,
+      "grad_norm": 4.673700332641602,
+      "learning_rate": 3.668333333333333e-05,
+      "loss": 0.2423,
+      "step": 1200
+    },
+    {
+      "epoch": 0.1225,
+      "grad_norm": 6.941758632659912,
+      "learning_rate": 3.6579166666666664e-05,
+      "loss": 0.3696,
+      "step": 1225
+    },
+    {
+      "epoch": 0.125,
+      "grad_norm": 6.368224143981934,
+      "learning_rate": 3.6475e-05,
+      "loss": 0.3809,
+      "step": 1250
+    },
+    {
+      "epoch": 0.1275,
+      "grad_norm": 6.675568103790283,
+      "learning_rate": 3.6370833333333334e-05,
+      "loss": 0.4108,
+      "step": 1275
+    },
+    {
+      "epoch": 0.13,
+      "grad_norm": 7.435705661773682,
+      "learning_rate": 3.626666666666666e-05,
+      "loss": 0.4293,
+      "step": 1300
+    },
+    {
+      "epoch": 0.1325,
+      "grad_norm": 6.09748649597168,
+      "learning_rate": 3.61625e-05,
+      "loss": 0.3692,
+      "step": 1325
+    },
+    {
+      "epoch": 0.135,
+      "grad_norm": 6.301654815673828,
+      "learning_rate": 3.6058333333333333e-05,
+      "loss": 0.3756,
+      "step": 1350
+    },
+    {
+      "epoch": 0.1375,
+      "grad_norm": 6.4854302406311035,
+      "learning_rate": 3.595416666666666e-05,
+      "loss": 0.3569,
+      "step": 1375
+    },
+    {
+      "epoch": 0.14,
+      "grad_norm": 6.333820343017578,
+      "learning_rate": 3.585e-05,
+      "loss": 0.3437,
+      "step": 1400
+    },
+    {
+      "epoch": 0.1425,
+      "grad_norm": 6.029584884643555,
+      "learning_rate": 3.5745833333333326e-05,
+      "loss": 0.3517,
+      "step": 1425
+    },
+    {
+      "epoch": 0.145,
+      "grad_norm": 4.968661308288574,
+      "learning_rate": 3.564166666666666e-05,
+      "loss": 0.3311,
+      "step": 1450
+    },
+    {
+      "epoch": 0.1475,
+      "grad_norm": 5.426311492919922,
+      "learning_rate": 3.5537499999999996e-05,
+      "loss": 0.2655,
+      "step": 1475
+    },
+    {
+      "epoch": 0.15,
+      "grad_norm": 5.133101463317871,
+      "learning_rate": 3.543333333333333e-05,
+      "loss": 0.235,
+      "step": 1500
+    },
+    {
+      "epoch": 0.1525,
+      "grad_norm": 3.7912516593933105,
+      "learning_rate": 3.532916666666667e-05,
+      "loss": 0.2387,
+      "step": 1525
+    },
+    {
+      "epoch": 0.155,
+      "grad_norm": 4.808184623718262,
+      "learning_rate": 3.5224999999999996e-05,
+      "loss": 0.2317,
+      "step": 1550
+    },
+    {
+      "epoch": 0.1575,
+      "grad_norm": 4.065270900726318,
+      "learning_rate": 3.512083333333333e-05,
+      "loss": 0.2045,
+      "step": 1575
+    },
+    {
+      "epoch": 0.16,
+      "grad_norm": 4.333083152770996,
+      "learning_rate": 3.5016666666666666e-05,
+      "loss": 0.2146,
+      "step": 1600
+    },
+    {
+      "epoch": 0.1625,
+      "grad_norm": 4.882490634918213,
+      "learning_rate": 3.49125e-05,
+      "loss": 0.3059,
+      "step": 1625
+    },
+    {
+      "epoch": 0.165,
+      "grad_norm": 5.470846652984619,
+      "learning_rate": 3.480833333333333e-05,
+      "loss": 0.3189,
+      "step": 1650
+    },
+    {
+      "epoch": 0.1675,
+      "grad_norm": 6.800407409667969,
+      "learning_rate": 3.4704166666666665e-05,
+      "loss": 0.3289,
+      "step": 1675
+    },
+    {
+      "epoch": 0.17,
+      "grad_norm": 4.934418201446533,
+      "learning_rate": 3.4599999999999994e-05,
+      "loss": 0.3128,
+      "step": 1700
+    },
+    {
+      "epoch": 0.1725,
+      "grad_norm": 4.8148908615112305,
+      "learning_rate": 3.449583333333333e-05,
+      "loss": 0.2578,
+      "step": 1725
+    },
+    {
+      "epoch": 0.175,
+      "grad_norm": 4.443181991577148,
+      "learning_rate": 3.4391666666666665e-05,
+      "loss": 0.2424,
+      "step": 1750
+    },
+    {
+      "epoch": 0.1775,
+      "grad_norm": 4.921210289001465,
+      "learning_rate": 3.42875e-05,
+      "loss": 0.2128,
+      "step": 1775
+    },
+    {
+      "epoch": 0.18,
+      "grad_norm": 4.358342170715332,
+      "learning_rate": 3.418333333333333e-05,
+      "loss": 0.2187,
+      "step": 1800
+    },
+    {
+      "epoch": 0.1825,
+      "grad_norm": 3.2940409183502197,
+      "learning_rate": 3.4079166666666664e-05,
+      "loss": 0.2061,
+      "step": 1825
+    },
+    {
+      "epoch": 0.185,
+      "grad_norm": 3.2484359741210938,
+      "learning_rate": 3.3975e-05,
+      "loss": 0.1944,
+      "step": 1850
+    },
+    {
+      "epoch": 0.1875,
+      "grad_norm": 7.2146382331848145,
+      "learning_rate": 3.3870833333333334e-05,
+      "loss": 0.1954,
+      "step": 1875
+    },
+    {
+      "epoch": 0.19,
+      "grad_norm": 3.803097724914551,
+      "learning_rate": 3.376666666666666e-05,
+      "loss": 0.1917,
+      "step": 1900
+    },
+    {
+      "epoch": 0.1925,
+      "grad_norm": 3.597217082977295,
+      "learning_rate": 3.36625e-05,
+      "loss": 0.1843,
+      "step": 1925
+    },
+    {
+      "epoch": 0.195,
+      "grad_norm": 4.769651412963867,
+      "learning_rate": 3.355833333333333e-05,
+      "loss": 0.2574,
+      "step": 1950
+    },
+    {
+      "epoch": 0.1975,
+      "grad_norm": 5.800076484680176,
+      "learning_rate": 3.345416666666666e-05,
+      "loss": 0.3319,
+      "step": 1975
+    },
+    {
+      "epoch": 0.2,
+      "grad_norm": 5.607431411743164,
+      "learning_rate": 3.335e-05,
+      "loss": 0.314,
+      "step": 2000
+    },
+    {
+      "epoch": 0.2,
+      "eval_loss": 0.43032270669937134,
+      "eval_runtime": 57.5686,
+      "eval_samples_per_second": 36.548,
+      "eval_steps_per_second": 2.293,
+      "eval_wer": 24.98831448069552,
+      "step": 2000
+    },
+    {
+      "epoch": 0.2025,
+      "grad_norm": 3.8960702419281006,
+      "learning_rate": 3.324583333333333e-05,
+      "loss": 0.2659,
+      "step": 2025
+    },
+    {
+      "epoch": 0.205,
+      "grad_norm": 3.8892998695373535,
+      "learning_rate": 3.314166666666666e-05,
+      "loss": 0.1938,
+      "step": 2050
+    },
+    {
+      "epoch": 0.2075,
+      "grad_norm": 4.232676982879639,
+      "learning_rate": 3.30375e-05,
+      "loss": 0.1869,
+      "step": 2075
+    },
+    {
+      "epoch": 0.21,
+      "grad_norm": 3.3340296745300293,
+      "learning_rate": 3.293333333333333e-05,
+      "loss": 0.1826,
+      "step": 2100
+    },
+    {
+      "epoch": 0.2125,
+      "grad_norm": 3.808135747909546,
+      "learning_rate": 3.282916666666667e-05,
+      "loss": 0.1597,
+      "step": 2125
+    },
+    {
+      "epoch": 0.215,
+      "grad_norm": 3.3088366985321045,
+      "learning_rate": 3.2724999999999996e-05,
+      "loss": 0.1626,
+      "step": 2150
+    },
+    {
+      "epoch": 0.2175,
+      "grad_norm": 4.035377502441406,
+      "learning_rate": 3.262083333333333e-05,
+      "loss": 0.1675,
+      "step": 2175
+    },
+    {
+      "epoch": 0.22,
+      "grad_norm": 4.653134346008301,
+      "learning_rate": 3.2516666666666666e-05,
+      "loss": 0.2853,
+      "step": 2200
+    },
+    {
+      "epoch": 0.2225,
+      "grad_norm": 5.569106101989746,
+      "learning_rate": 3.2412499999999995e-05,
+      "loss": 0.2921,
+      "step": 2225
+    },
+    {
+      "epoch": 0.225,
+      "grad_norm": 4.800086975097656,
+      "learning_rate": 3.230833333333333e-05,
+      "loss": 0.3024,
+      "step": 2250
+    },
+    {
+      "epoch": 0.2275,
+      "grad_norm": 4.1314377784729,
+      "learning_rate": 3.2204166666666666e-05,
+      "loss": 0.2125,
+      "step": 2275
+    },
+    {
+      "epoch": 0.23,
+      "grad_norm": 3.3272790908813477,
+      "learning_rate": 3.2099999999999994e-05,
+      "loss": 0.163,
+      "step": 2300
+    },
+    {
+      "epoch": 0.2325,
+      "grad_norm": 3.977968215942383,
+      "learning_rate": 3.199583333333333e-05,
+      "loss": 0.1525,
+      "step": 2325
+    },
+    {
+      "epoch": 0.235,
+      "grad_norm": 5.358455181121826,
+      "learning_rate": 3.1891666666666665e-05,
+      "loss": 0.1544,
+      "step": 2350
+    },
+    {
+      "epoch": 0.2375,
+      "grad_norm": 2.996466875076294,
+      "learning_rate": 3.17875e-05,
+      "loss": 0.1576,
+      "step": 2375
+    },
+    {
+      "epoch": 0.24,
+      "grad_norm": 3.985736846923828,
+      "learning_rate": 3.168333333333333e-05,
+      "loss": 0.1621,
+      "step": 2400
+    },
+    {
+      "epoch": 0.2425,
+      "grad_norm": 3.0388622283935547,
+      "learning_rate": 3.1579166666666664e-05,
+      "loss": 0.1644,
+      "step": 2425
+    },
+    {
+      "epoch": 0.245,
+      "grad_norm": 2.8498051166534424,
+      "learning_rate": 3.1475e-05,
+      "loss": 0.155,
+      "step": 2450
+    },
+    {
+      "epoch": 0.2475,
+      "grad_norm": 3.7256104946136475,
+      "learning_rate": 3.1370833333333335e-05,
+      "loss": 0.163,
+      "step": 2475
+    },
+    {
+      "epoch": 0.25,
+      "grad_norm": 3.6672263145446777,
+      "learning_rate": 3.126666666666666e-05,
+      "loss": 0.1859,
+      "step": 2500
+    },
+    {
+      "epoch": 0.2525,
+      "grad_norm": 5.497701644897461,
+      "learning_rate": 3.11625e-05,
+      "loss": 0.2491,
+      "step": 2525
+    },
+    {
+      "epoch": 0.255,
+      "grad_norm": 4.235458850860596,
+      "learning_rate": 3.105833333333333e-05,
+      "loss": 0.2381,
+      "step": 2550
+    },
+    {
+      "epoch": 0.2575,
+      "grad_norm": 4.74775505065918,
+      "learning_rate": 3.095416666666666e-05,
+      "loss": 0.279,
+      "step": 2575
+    },
+    {
+      "epoch": 0.26,
+      "grad_norm": 3.5344150066375732,
+      "learning_rate": 3.085e-05,
+      "loss": 0.2067,
+      "step": 2600
+    },
+    {
+      "epoch": 0.2625,
+      "grad_norm": 3.5317158699035645,
+      "learning_rate": 3.074583333333333e-05,
+      "loss": 0.1741,
+      "step": 2625
+    },
+    {
+      "epoch": 0.265,
+      "grad_norm": 3.141709327697754,
+      "learning_rate": 3.064166666666666e-05,
+      "loss": 0.1565,
+      "step": 2650
+    },
+    {
+      "epoch": 0.2675,
+      "grad_norm": 2.9644110202789307,
+      "learning_rate": 3.05375e-05,
+      "loss": 0.1501,
+      "step": 2675
+    },
+    {
+      "epoch": 0.27,
+      "grad_norm": 3.013913631439209,
+      "learning_rate": 3.0433333333333332e-05,
+      "loss": 0.1584,
+      "step": 2700
+    },
+    {
+      "epoch": 0.2725,
+      "grad_norm": 3.1557493209838867,
+      "learning_rate": 3.0329166666666664e-05,
+      "loss": 0.1685,
+      "step": 2725
+    },
+    {
+      "epoch": 0.275,
+      "grad_norm": 4.391005516052246,
+      "learning_rate": 3.0225e-05,
+      "loss": 0.1813,
+      "step": 2750
+    },
+    {
+      "epoch": 0.2775,
+      "grad_norm": 5.381415367126465,
+      "learning_rate": 3.0120833333333328e-05,
+      "loss": 0.3171,
+      "step": 2775
+    },
+    {
+      "epoch": 0.28,
+      "grad_norm": 4.508987903594971,
+      "learning_rate": 3.0016666666666663e-05,
+      "loss": 0.2927,
+      "step": 2800
+    },
+    {
+      "epoch": 0.2825,
+      "grad_norm": 4.837850570678711,
+      "learning_rate": 2.9912499999999995e-05,
+      "loss": 0.2568,
+      "step": 2825
+    },
+    {
+      "epoch": 0.285,
+      "grad_norm": 4.011997699737549,
+      "learning_rate": 2.980833333333333e-05,
+      "loss": 0.2629,
+      "step": 2850
+    },
+    {
+      "epoch": 0.2875,
+      "grad_norm": 4.157077312469482,
+      "learning_rate": 2.9704166666666662e-05,
+      "loss": 0.2561,
+      "step": 2875
+    },
+    {
+      "epoch": 0.29,
+      "grad_norm": 5.459561347961426,
+      "learning_rate": 2.9599999999999998e-05,
+      "loss": 0.2493,
+      "step": 2900
+    },
+    {
+      "epoch": 0.2925,
+      "grad_norm": 3.367072343826294,
+      "learning_rate": 2.949583333333333e-05,
+      "loss": 0.2006,
+      "step": 2925
+    },
+    {
+      "epoch": 0.295,
+      "grad_norm": 3.317788600921631,
+      "learning_rate": 2.9391666666666665e-05,
+      "loss": 0.1614,
+      "step": 2950
+    },
+    {
+      "epoch": 0.2975,
+      "grad_norm": 4.068172454833984,
+      "learning_rate": 2.9287499999999997e-05,
+      "loss": 0.1683,
+      "step": 2975
+    },
+    {
+      "epoch": 0.3,
+      "grad_norm": 3.870027542114258,
+      "learning_rate": 2.9183333333333332e-05,
+      "loss": 0.2244,
+      "step": 3000
+    },
+    {
+      "epoch": 0.3,
+      "eval_loss": 0.40985628962516785,
+      "eval_runtime": 58.8739,
+      "eval_samples_per_second": 35.737,
+      "eval_steps_per_second": 2.242,
+      "eval_wer": 21.692998036832755,
+      "step": 3000
+    },
+    {
+      "epoch": 0.3025,
+      "grad_norm": 5.575135231018066,
+      "learning_rate": 2.9079166666666664e-05,
+      "loss": 0.2758,
+      "step": 3025
+    },
+    {
+      "epoch": 0.305,
+      "grad_norm": 6.743279933929443,
+      "learning_rate": 2.8974999999999996e-05,
+      "loss": 0.3198,
+      "step": 3050
+    },
+    {
+      "epoch": 0.3075,
+      "grad_norm": 3.2028794288635254,
+      "learning_rate": 2.8870833333333328e-05,
+      "loss": 0.2196,
+      "step": 3075
+    },
+    {
+      "epoch": 0.31,
+      "grad_norm": 3.93858003616333,
+      "learning_rate": 2.8766666666666663e-05,
+      "loss": 0.1707,
+      "step": 3100
+    },
+    {
+      "epoch": 0.3125,
+      "grad_norm": 3.316304922103882,
+      "learning_rate": 2.8662499999999995e-05,
+      "loss": 0.1511,
+      "step": 3125
+    },
+    {
+      "epoch": 0.315,
+      "grad_norm": 4.505865097045898,
+      "learning_rate": 2.855833333333333e-05,
+      "loss": 0.1823,
+      "step": 3150
+    },
+    {
+      "epoch": 0.3175,
+      "grad_norm": 4.541491508483887,
+      "learning_rate": 2.8454166666666663e-05,
+      "loss": 0.2677,
+      "step": 3175
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 5.266469478607178,
+      "learning_rate": 2.8349999999999998e-05,
+      "loss": 0.2491,
+      "step": 3200
+    },
+    {
+      "epoch": 0.3225,
+      "grad_norm": 4.68573522567749,
+      "learning_rate": 2.824583333333333e-05,
+      "loss": 0.2651,
+      "step": 3225
+    },
+    {
+      "epoch": 0.325,
+      "grad_norm": 4.3486127853393555,
+      "learning_rate": 2.8141666666666665e-05,
+      "loss": 0.2176,
+      "step": 3250
+    },
+    {
+      "epoch": 0.3275,
+      "grad_norm": 5.374783515930176,
+      "learning_rate": 2.80375e-05,
+      "loss": 0.2237,
+      "step": 3275
+    },
+    {
+      "epoch": 0.33,
+      "grad_norm": 3.7645037174224854,
+      "learning_rate": 2.7933333333333332e-05,
+      "loss": 0.2381,
+      "step": 3300
+    },
+    {
+      "epoch": 0.3325,
+      "grad_norm": 3.953054666519165,
+      "learning_rate": 2.7829166666666668e-05,
+      "loss": 0.189,
+      "step": 3325
+    },
+    {
+      "epoch": 0.335,
+      "grad_norm": 3.9426944255828857,
+      "learning_rate": 2.7724999999999996e-05,
+      "loss": 0.1514,
+      "step": 3350
+    },
+    {
+      "epoch": 0.3375,
+      "grad_norm": 3.1478254795074463,
+      "learning_rate": 2.7620833333333328e-05,
+      "loss": 0.1353,
+      "step": 3375
+    },
+    {
+      "epoch": 0.34,
+      "grad_norm": 3.234048366546631,
+      "learning_rate": 2.7516666666666664e-05,
+      "loss": 0.1437,
+      "step": 3400
+    },
+    {
+      "epoch": 0.3425,
+      "grad_norm": 3.828132390975952,
+      "learning_rate": 2.7412499999999995e-05,
+      "loss": 0.1453,
+      "step": 3425
+    },
+    {
+      "epoch": 0.345,
+      "grad_norm": 2.6246049404144287,
+      "learning_rate": 2.730833333333333e-05,
+      "loss": 0.1369,
+      "step": 3450
+    },
+    {
+      "epoch": 0.3475,
+      "grad_norm": 3.3502352237701416,
+      "learning_rate": 2.7204166666666663e-05,
+      "loss": 0.132,
+      "step": 3475
+    },
+    {
+      "epoch": 0.35,
+      "grad_norm": 4.489558696746826,
+      "learning_rate": 2.7099999999999998e-05,
+      "loss": 0.1607,
+      "step": 3500
+    },
+    {
+      "epoch": 0.3525,
+      "grad_norm": 3.3890199661254883,
+      "learning_rate": 2.6995833333333333e-05,
+      "loss": 0.1498,
+      "step": 3525
+    },
+    {
+      "epoch": 0.355,
+      "grad_norm": 5.810647487640381,
+      "learning_rate": 2.6891666666666665e-05,
+      "loss": 0.1479,
+      "step": 3550
+    },
+    {
+      "epoch": 0.3575,
+      "grad_norm": 5.223288059234619,
+      "learning_rate": 2.67875e-05,
+      "loss": 0.2281,
+      "step": 3575
+    },
+    {
+      "epoch": 0.36,
+      "grad_norm": 4.546659469604492,
+      "learning_rate": 2.6683333333333333e-05,
+      "loss": 0.2638,
+      "step": 3600
+    },
+    {
+      "epoch": 0.3625,
+      "grad_norm": 4.0937018394470215,
+      "learning_rate": 2.657916666666666e-05,
+      "loss": 0.2437,
+      "step": 3625
+    },
+    {
+      "epoch": 0.365,
+      "grad_norm": 2.648258686065674,
+      "learning_rate": 2.6474999999999996e-05,
+      "loss": 0.1537,
+      "step": 3650
+    },
+    {
+      "epoch": 0.3675,
+      "grad_norm": 3.579690456390381,
+      "learning_rate": 2.637083333333333e-05,
+      "loss": 0.1338,
+      "step": 3675
+    },
+    {
+      "epoch": 0.37,
+      "grad_norm": 2.849726438522339,
+      "learning_rate": 2.6266666666666664e-05,
+      "loss": 0.1334,
+      "step": 3700
+    },
+    {
+      "epoch": 0.3725,
+      "grad_norm": 3.449441432952881,
+      "learning_rate": 2.61625e-05,
+      "loss": 0.1603,
+      "step": 3725
+    },
+    {
+      "epoch": 0.375,
+      "grad_norm": 4.7894768714904785,
+      "learning_rate": 2.605833333333333e-05,
+      "loss": 0.2046,
+      "step": 3750
+    },
+    {
+      "epoch": 0.3775,
+      "grad_norm": 3.9832916259765625,
+      "learning_rate": 2.5954166666666666e-05,
+      "loss": 0.25,
+      "step": 3775
+    },
+    {
+      "epoch": 0.38,
+      "grad_norm": 4.628417015075684,
+      "learning_rate": 2.5849999999999998e-05,
+      "loss": 0.2515,
+      "step": 3800
+    },
+    {
+      "epoch": 0.3825,
+      "grad_norm": 5.086034297943115,
+      "learning_rate": 2.5745833333333333e-05,
+      "loss": 0.253,
+      "step": 3825
+    },
+    {
+      "epoch": 0.385,
+      "grad_norm": 5.298262119293213,
+      "learning_rate": 2.5641666666666665e-05,
+      "loss": 0.2664,
+      "step": 3850
+    },
+    {
+      "epoch": 0.3875,
+      "grad_norm": 4.012029647827148,
+      "learning_rate": 2.55375e-05,
+      "loss": 0.234,
+      "step": 3875
+    },
+    {
+      "epoch": 0.39,
+      "grad_norm": 4.897658824920654,
+      "learning_rate": 2.543333333333333e-05,
+      "loss": 0.1752,
+      "step": 3900
+    },
+    {
+      "epoch": 0.3925,
+      "grad_norm": 2.986084222793579,
+      "learning_rate": 2.5329166666666665e-05,
+      "loss": 0.1496,
+      "step": 3925
+    },
+    {
+      "epoch": 0.395,
+      "grad_norm": 3.444263219833374,
+      "learning_rate": 2.5224999999999997e-05,
+      "loss": 0.141,
+      "step": 3950
+    },
+    {
+      "epoch": 0.3975,
+      "grad_norm": 6.647165775299072,
+      "learning_rate": 2.5120833333333332e-05,
+      "loss": 0.2411,
+      "step": 3975
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 4.901034832000732,
+      "learning_rate": 2.5016666666666664e-05,
+      "loss": 0.3024,
+      "step": 4000
+    },
+    {
+      "epoch": 0.4,
+      "eval_loss": 0.3800531327724457,
+      "eval_runtime": 59.3618,
+      "eval_samples_per_second": 35.444,
+      "eval_steps_per_second": 2.224,
+      "eval_wer": 21.772459568103205,
+      "step": 4000
+    },
+    {
+      "epoch": 0.4025,
+      "grad_norm": 3.827197313308716,
+      "learning_rate": 2.49125e-05,
+      "loss": 0.2243,
+      "step": 4025
+    },
+    {
+      "epoch": 0.405,
+      "grad_norm": 5.707655429840088,
+      "learning_rate": 2.480833333333333e-05,
+      "loss": 0.3027,
+      "step": 4050
+    },
+    {
+      "epoch": 0.4075,
+      "grad_norm": 4.641834259033203,
+      "learning_rate": 2.4704166666666666e-05,
+      "loss": 0.2182,
+      "step": 4075
+    },
+    {
+      "epoch": 0.41,
+      "grad_norm": 3.259056329727173,
+      "learning_rate": 2.4599999999999998e-05,
+      "loss": 0.2125,
+      "step": 4100
+    },
+    {
+      "epoch": 0.4125,
+      "grad_norm": 2.5339012145996094,
+      "learning_rate": 2.4495833333333334e-05,
+      "loss": 0.1519,
+      "step": 4125
+    },
+    {
+      "epoch": 0.415,
+      "grad_norm": 3.520731210708618,
+      "learning_rate": 2.4391666666666666e-05,
+      "loss": 0.1316,
+      "step": 4150
+    },
+    {
+      "epoch": 0.4175,
+      "grad_norm": 3.302873373031616,
+      "learning_rate": 2.4287499999999997e-05,
+      "loss": 0.1396,
+      "step": 4175
+    },
+    {
+      "epoch": 0.42,
+      "grad_norm": 3.5453193187713623,
+      "learning_rate": 2.418333333333333e-05,
+      "loss": 0.132,
+      "step": 4200
+    },
+    {
+      "epoch": 0.4225,
+      "grad_norm": 2.582653522491455,
+      "learning_rate": 2.4079166666666665e-05,
+      "loss": 0.1394,
+      "step": 4225
+    },
+    {
+      "epoch": 0.425,
+      "grad_norm": 3.1388375759124756,
+      "learning_rate": 2.3974999999999997e-05,
+      "loss": 0.1351,
+      "step": 4250
+    },
+    {
+      "epoch": 0.4275,
+      "grad_norm": 3.6783761978149414,
+      "learning_rate": 2.3870833333333332e-05,
+      "loss": 0.1508,
+      "step": 4275
+    },
+    {
+      "epoch": 0.43,
+      "grad_norm": 4.66838264465332,
+      "learning_rate": 2.3766666666666664e-05,
+      "loss": 0.2209,
+      "step": 4300
+    },
+    {
+      "epoch": 0.4325,
+      "grad_norm": 4.905246734619141,
+      "learning_rate": 2.36625e-05,
+      "loss": 0.1991,
+      "step": 4325
+    },
+    {
+      "epoch": 0.435,
+      "grad_norm": 4.691884517669678,
+      "learning_rate": 2.355833333333333e-05,
+      "loss": 0.2077,
+      "step": 4350
+    },
+    {
+      "epoch": 0.4375,
+      "grad_norm": 2.6007065773010254,
+      "learning_rate": 2.3454166666666666e-05,
+      "loss": 0.1266,
+      "step": 4375
+    },
+    {
+      "epoch": 0.44,
+      "grad_norm": 9.824126243591309,
+      "learning_rate": 2.335e-05,
+      "loss": 0.1125,
+      "step": 4400
+    },
+    {
+      "epoch": 0.4425,
+      "grad_norm": 3.417362689971924,
+      "learning_rate": 2.3245833333333334e-05,
+      "loss": 0.125,
+      "step": 4425
+    },
+    {
+      "epoch": 0.445,
+      "grad_norm": 3.469744920730591,
+      "learning_rate": 2.3141666666666666e-05,
+      "loss": 0.1466,
+      "step": 4450
+    },
+    {
+      "epoch": 0.4475,
+      "grad_norm": 3.0977509021759033,
+      "learning_rate": 2.3037499999999998e-05,
+      "loss": 0.149,
+      "step": 4475
+    },
+    {
+      "epoch": 0.45,
+      "grad_norm": 3.3727025985717773,
+      "learning_rate": 2.293333333333333e-05,
+      "loss": 0.1388,
+      "step": 4500
+    },
+    {
+      "epoch": 0.4525,
+      "grad_norm": 4.079521656036377,
+      "learning_rate": 2.2829166666666665e-05,
+      "loss": 0.1807,
+      "step": 4525
+    },
+    {
+      "epoch": 0.455,
+      "grad_norm": 4.479795455932617,
+      "learning_rate": 2.2724999999999997e-05,
+      "loss": 0.2362,
+      "step": 4550
+    },
+    {
+      "epoch": 0.4575,
+      "grad_norm": 5.127561092376709,
+      "learning_rate": 2.2620833333333332e-05,
+      "loss": 0.2232,
+      "step": 4575
+    },
+    {
+      "epoch": 0.46,
+      "grad_norm": 4.606805801391602,
+      "learning_rate": 2.2516666666666664e-05,
+      "loss": 0.2392,
+      "step": 4600
+    },
+    {
+      "epoch": 0.4625,
+      "grad_norm": 4.652634620666504,
+      "learning_rate": 2.24125e-05,
+      "loss": 0.2442,
+      "step": 4625
+    },
+    {
+      "epoch": 0.465,
+      "grad_norm": 5.170529365539551,
+      "learning_rate": 2.230833333333333e-05,
+      "loss": 0.2364,
+      "step": 4650
+    },
+    {
+      "epoch": 0.4675,
+      "grad_norm": 4.186745643615723,
+      "learning_rate": 2.2204166666666667e-05,
+      "loss": 0.204,
+      "step": 4675
+    },
+    {
+      "epoch": 0.47,
+      "grad_norm": 3.336618423461914,
+      "learning_rate": 2.21e-05,
+      "loss": 0.1811,
+      "step": 4700
+    },
+    {
+      "epoch": 0.4725,
+      "grad_norm": 4.125630855560303,
+      "learning_rate": 2.1995833333333334e-05,
+      "loss": 0.1953,
+      "step": 4725
+    },
+    {
+      "epoch": 0.475,
+      "grad_norm": 3.7965681552886963,
+      "learning_rate": 2.1891666666666662e-05,
+      "loss": 0.1929,
+      "step": 4750
+    },
+    {
+      "epoch": 0.4775,
+      "grad_norm": 3.5311403274536133,
+      "learning_rate": 2.1787499999999998e-05,
+      "loss": 0.1436,
+      "step": 4775
+    },
+    {
+      "epoch": 0.48,
+      "grad_norm": 3.0808041095733643,
+      "learning_rate": 2.168333333333333e-05,
+      "loss": 0.1581,
+      "step": 4800
+    },
+    {
+      "epoch": 0.4825,
+      "grad_norm": 3.254953384399414,
+      "learning_rate": 2.1579166666666665e-05,
+      "loss": 0.1331,
+      "step": 4825
+    },
+    {
+      "epoch": 0.485,
+      "grad_norm": 3.7512283325195312,
+      "learning_rate": 2.1474999999999997e-05,
+      "loss": 0.1534,
+      "step": 4850
+    },
+    {
+      "epoch": 0.4875,
+      "grad_norm": 4.095212936401367,
+      "learning_rate": 2.1370833333333332e-05,
+      "loss": 0.1611,
+      "step": 4875
+    },
+    {
+      "epoch": 0.49,
+      "grad_norm": 4.149164199829102,
+      "learning_rate": 2.1266666666666664e-05,
+      "loss": 0.1834,
+      "step": 4900
+    },
+    {
+      "epoch": 0.4925,
+      "grad_norm": 2.9498376846313477,
+      "learning_rate": 2.11625e-05,
+      "loss": 0.1653,
+      "step": 4925
+    },
+    {
+      "epoch": 0.495,
+      "grad_norm": 3.2201554775238037,
+      "learning_rate": 2.105833333333333e-05,
+      "loss": 0.1379,
+      "step": 4950
+    },
+    {
+      "epoch": 0.4975,
+      "grad_norm": 3.2108354568481445,
+      "learning_rate": 2.0954166666666667e-05,
+      "loss": 0.1281,
+      "step": 4975
+    },
+    {
+      "epoch": 0.5,
+      "grad_norm": 3.786989212036133,
+      "learning_rate": 2.085e-05,
+      "loss": 0.1309,
+      "step": 5000
+    },
+    {
+      "epoch": 0.5,
+      "eval_loss": 0.37590494751930237,
+      "eval_runtime": 59.5468,
+      "eval_samples_per_second": 35.334,
+      "eval_steps_per_second": 2.217,
+      "eval_wer": 18.22941011498551,
+      "step": 5000
+    },
+    {
+      "epoch": 0.5025,
+      "grad_norm": 3.3882791996002197,
+      "learning_rate": 2.074583333333333e-05,
+      "loss": 0.1873,
+      "step": 5025
+    },
+    {
+      "epoch": 0.505,
+      "grad_norm": 3.8903958797454834,
+      "learning_rate": 2.0641666666666662e-05,
+      "loss": 0.1647,
+      "step": 5050
+    },
+    {
+      "epoch": 0.5075,
+      "grad_norm": 4.302113056182861,
+      "learning_rate": 2.0537499999999998e-05,
+      "loss": 0.2523,
+      "step": 5075
+    },
+    {
+      "epoch": 0.51,
+      "grad_norm": 4.574698448181152,
+      "learning_rate": 2.043333333333333e-05,
+      "loss": 0.2285,
+      "step": 5100
+    },
+    {
+      "epoch": 0.5125,
+      "grad_norm": 4.252900123596191,
+      "learning_rate": 2.0329166666666665e-05,
+      "loss": 0.1976,
+      "step": 5125
+    },
+    {
+      "epoch": 0.515,
+      "grad_norm": 5.135857582092285,
+      "learning_rate": 2.0224999999999997e-05,
+      "loss": 0.2034,
+      "step": 5150
+    },
+    {
+      "epoch": 0.5175,
+      "grad_norm": 2.7495837211608887,
+      "learning_rate": 2.0120833333333332e-05,
+      "loss": 0.1707,
+      "step": 5175
+    },
+    {
+      "epoch": 0.52,
+      "grad_norm": 2.8536860942840576,
+      "learning_rate": 2.0016666666666664e-05,
+      "loss": 0.1165,
+      "step": 5200
+    },
+    {
+      "epoch": 0.5225,
+      "grad_norm": 2.8752596378326416,
+      "learning_rate": 1.99125e-05,
+      "loss": 0.1224,
+      "step": 5225
+    },
+    {
+      "epoch": 0.525,
+      "grad_norm": 2.7139313220977783,
+      "learning_rate": 1.980833333333333e-05,
+      "loss": 0.1221,
+      "step": 5250
+    },
+    {
+      "epoch": 0.5275,
+      "grad_norm": 2.398601531982422,
+      "learning_rate": 1.9704166666666667e-05,
+      "loss": 0.123,
+      "step": 5275
+    },
+    {
+      "epoch": 0.53,
+      "grad_norm": 3.568558931350708,
+      "learning_rate": 1.9599999999999995e-05,
+      "loss": 0.1287,
+      "step": 5300
+    },
+    {
+      "epoch": 0.5325,
+      "grad_norm": 3.4306693077087402,
+      "learning_rate": 1.949583333333333e-05,
+      "loss": 0.1282,
+      "step": 5325
+    },
+    {
+      "epoch": 0.535,
+      "grad_norm": 4.469999313354492,
+      "learning_rate": 1.9391666666666663e-05,
+      "loss": 0.1709,
+      "step": 5350
+    },
+    {
+      "epoch": 0.5375,
+      "grad_norm": 4.155087471008301,
+      "learning_rate": 1.9287499999999998e-05,
+      "loss": 0.202,
+      "step": 5375
+    },
+    {
+      "epoch": 0.54,
+      "grad_norm": 3.4707844257354736,
+      "learning_rate": 1.918333333333333e-05,
+      "loss": 0.1904,
+      "step": 5400
+    },
+    {
+      "epoch": 0.5425,
+      "grad_norm": 4.137521266937256,
+      "learning_rate": 1.9079166666666665e-05,
+      "loss": 0.184,
+      "step": 5425
+    },
+    {
+      "epoch": 0.545,
+      "grad_norm": 3.6141257286071777,
+      "learning_rate": 1.8974999999999997e-05,
+      "loss": 0.1629,
+      "step": 5450
+    },
+    {
+      "epoch": 0.5475,
+      "grad_norm": 3.5112478733062744,
+      "learning_rate": 1.8870833333333332e-05,
+      "loss": 0.1358,
+      "step": 5475
+    },
+    {
+      "epoch": 0.55,
+      "grad_norm": 3.2462754249572754,
+      "learning_rate": 1.8766666666666664e-05,
+      "loss": 0.1379,
+      "step": 5500
+    },
+    {
+      "epoch": 0.5525,
+      "grad_norm": 5.152227878570557,
+      "learning_rate": 1.8662499999999996e-05,
+      "loss": 0.1776,
+      "step": 5525
+    },
+    {
+      "epoch": 0.555,
+      "grad_norm": 4.612731456756592,
+      "learning_rate": 1.855833333333333e-05,
+      "loss": 0.2047,
+      "step": 5550
+    },
+    {
+      "epoch": 0.5575,
+      "grad_norm": 4.244606018066406,
+      "learning_rate": 1.8454166666666663e-05,
+      "loss": 0.1606,
+      "step": 5575
+    },
+    {
+      "epoch": 0.56,
+      "grad_norm": 2.2419683933258057,
+      "learning_rate": 1.835e-05,
+      "loss": 0.1089,
+      "step": 5600
+    },
+    {
+      "epoch": 0.5625,
+      "grad_norm": 3.0442473888397217,
+      "learning_rate": 1.8245833333333334e-05,
+      "loss": 0.0874,
+      "step": 5625
+    },
+    {
+      "epoch": 0.565,
+      "grad_norm": 2.6160330772399902,
+      "learning_rate": 1.8141666666666663e-05,
+      "loss": 0.0896,
+      "step": 5650
+    },
+    {
+      "epoch": 0.5675,
+      "grad_norm": 2.5410189628601074,
+      "learning_rate": 1.8037499999999998e-05,
+      "loss": 0.1159,
+      "step": 5675
+    },
+    {
+      "epoch": 0.57,
+      "grad_norm": 3.572497606277466,
+      "learning_rate": 1.793333333333333e-05,
+      "loss": 0.1245,
+      "step": 5700
+    },
+    {
+      "epoch": 0.5725,
+      "grad_norm": 2.8839869499206543,
+      "learning_rate": 1.7829166666666665e-05,
+      "loss": 0.1236,
+      "step": 5725
+    },
+    {
+      "epoch": 0.575,
+      "grad_norm": 2.6902310848236084,
+      "learning_rate": 1.7725e-05,
+      "loss": 0.1165,
+      "step": 5750
+    },
+    {
+      "epoch": 0.5775,
+      "grad_norm": 2.7092816829681396,
+      "learning_rate": 1.7620833333333332e-05,
+      "loss": 0.1164,
+      "step": 5775
+    },
+    {
+      "epoch": 0.58,
+      "grad_norm": 2.8278088569641113,
+      "learning_rate": 1.7516666666666664e-05,
+      "loss": 0.1098,
+      "step": 5800
+    },
+    {
+      "epoch": 0.5825,
+      "grad_norm": 2.267011880874634,
+      "learning_rate": 1.7412499999999996e-05,
+      "loss": 0.096,
+      "step": 5825
+    },
+    {
+      "epoch": 0.585,
+      "grad_norm": 3.5820508003234863,
+      "learning_rate": 1.730833333333333e-05,
+      "loss": 0.1046,
+      "step": 5850
+    },
+    {
+      "epoch": 0.5875,
+      "grad_norm": 3.4889466762542725,
+      "learning_rate": 1.7204166666666667e-05,
+      "loss": 0.1114,
+      "step": 5875
+    },
+    {
+      "epoch": 0.59,
+      "grad_norm": 3.823050022125244,
+      "learning_rate": 1.71e-05,
+      "loss": 0.1408,
+      "step": 5900
+    },
+    {
+      "epoch": 0.5925,
+      "grad_norm": 6.086155414581299,
+      "learning_rate": 1.6995833333333334e-05,
+      "loss": 0.2024,
+      "step": 5925
+    },
+    {
+      "epoch": 0.595,
+      "grad_norm": 3.8740174770355225,
+      "learning_rate": 1.6891666666666663e-05,
+      "loss": 0.1934,
+      "step": 5950
+    },
+    {
+      "epoch": 0.5975,
+      "grad_norm": 4.5182881355285645,
+      "learning_rate": 1.6787499999999998e-05,
+      "loss": 0.1699,
+      "step": 5975
+    },
+    {
+      "epoch": 0.6,
+      "grad_norm": 3.7333216667175293,
+      "learning_rate": 1.6683333333333333e-05,
+      "loss": 0.1582,
+      "step": 6000
+    },
+    {
+      "epoch": 0.6,
+      "eval_loss": 0.36108532547950745,
+      "eval_runtime": 60.9789,
+      "eval_samples_per_second": 34.504,
+      "eval_steps_per_second": 2.165,
+      "eval_wer": 17.977002898008788,
+      "step": 6000
+    },
+    {
+      "epoch": 0.6025,
+      "grad_norm": 3.5465595722198486,
+      "learning_rate": 1.6579166666666665e-05,
+      "loss": 0.1487,
+      "step": 6025
+    },
+    {
+      "epoch": 0.605,
+      "grad_norm": 3.182589530944824,
+      "learning_rate": 1.6475e-05,
+      "loss": 0.1345,
+      "step": 6050
+    },
+    {
+      "epoch": 0.6075,
+      "grad_norm": 2.878831148147583,
+      "learning_rate": 1.6370833333333333e-05,
+      "loss": 0.1197,
+      "step": 6075
+    },
+    {
+      "epoch": 0.61,
+      "grad_norm": 2.519314765930176,
+      "learning_rate": 1.6266666666666665e-05,
+      "loss": 0.1115,
+      "step": 6100
+    },
+    {
+      "epoch": 0.6125,
+      "grad_norm": 3.164379596710205,
+      "learning_rate": 1.61625e-05,
+      "loss": 0.1144,
+      "step": 6125
+    },
+    {
+      "epoch": 0.615,
+      "grad_norm": 3.2751474380493164,
+      "learning_rate": 1.6058333333333332e-05,
+      "loss": 0.1295,
+      "step": 6150
+    },
+    {
+      "epoch": 0.6175,
+      "grad_norm": 3.138570785522461,
+      "learning_rate": 1.5954166666666667e-05,
+      "loss": 0.128,
+      "step": 6175
+    },
+    {
+      "epoch": 0.62,
+      "grad_norm": 4.330804347991943,
+      "learning_rate": 1.585e-05,
+      "loss": 0.1287,
+      "step": 6200
+    },
+    {
+      "epoch": 0.6225,
+      "grad_norm": 3.6639904975891113,
+      "learning_rate": 1.574583333333333e-05,
+      "loss": 0.1225,
+      "step": 6225
+    },
+    {
+      "epoch": 0.625,
+      "grad_norm": 3.540968179702759,
+      "learning_rate": 1.5641666666666666e-05,
+      "loss": 0.1148,
+      "step": 6250
+    },
+    {
+      "epoch": 0.6275,
+      "grad_norm": 3.4403693675994873,
+      "learning_rate": 1.5537499999999998e-05,
+      "loss": 0.1325,
+      "step": 6275
+    },
+    {
+      "epoch": 0.63,
+      "grad_norm": 3.9871201515197754,
+      "learning_rate": 1.5433333333333334e-05,
+      "loss": 0.1443,
+      "step": 6300
+    },
+    {
+      "epoch": 0.6325,
+      "grad_norm": 3.233175039291382,
+      "learning_rate": 1.5329166666666665e-05,
+      "loss": 0.1955,
+      "step": 6325
+    },
+    {
+      "epoch": 0.635,
+      "grad_norm": 3.86395525932312,
+      "learning_rate": 1.5224999999999999e-05,
+      "loss": 0.1768,
+      "step": 6350
+    },
+    {
+      "epoch": 0.6375,
+      "grad_norm": 3.179206609725952,
+      "learning_rate": 1.5120833333333331e-05,
+      "loss": 0.1644,
+      "step": 6375
+    },
+    {
+      "epoch": 0.64,
+      "grad_norm": 2.821089267730713,
+      "learning_rate": 1.5016666666666665e-05,
+      "loss": 0.1247,
+      "step": 6400
+    },
+    {
+      "epoch": 0.6425,
+      "grad_norm": 2.4977850914001465,
+      "learning_rate": 1.4912499999999998e-05,
+      "loss": 0.1133,
+      "step": 6425
+    },
+    {
+      "epoch": 0.645,
+      "grad_norm": 4.838348388671875,
+      "learning_rate": 1.4808333333333332e-05,
+      "loss": 0.1097,
+      "step": 6450
+    },
+    {
+      "epoch": 0.6475,
+      "grad_norm": 3.5508921146392822,
+      "learning_rate": 1.4704166666666666e-05,
+      "loss": 0.1553,
+      "step": 6475
+    },
+    {
+      "epoch": 0.65,
+      "grad_norm": 3.1777148246765137,
+      "learning_rate": 1.4599999999999997e-05,
+      "loss": 0.1492,
+      "step": 6500
+    },
+    {
+      "epoch": 0.6525,
+      "grad_norm": 5.064118385314941,
+      "learning_rate": 1.4495833333333331e-05,
+      "loss": 0.1921,
+      "step": 6525
+    },
+    {
+      "epoch": 0.655,
+      "grad_norm": 3.8920788764953613,
+      "learning_rate": 1.4391666666666665e-05,
+      "loss": 0.1769,
+      "step": 6550
+    },
+    {
+      "epoch": 0.6575,
+      "grad_norm": 3.3840203285217285,
+      "learning_rate": 1.4287499999999998e-05,
+      "loss": 0.1772,
+      "step": 6575
+    },
+    {
+      "epoch": 0.66,
+      "grad_norm": 3.4659571647644043,
+      "learning_rate": 1.4183333333333332e-05,
+      "loss": 0.155,
+      "step": 6600
+    },
+    {
+      "epoch": 0.6625,
+      "grad_norm": 3.893953561782837,
+      "learning_rate": 1.4079166666666666e-05,
+      "loss": 0.1666,
+      "step": 6625
+    },
+    {
+      "epoch": 0.665,
+      "grad_norm": 3.3748056888580322,
+      "learning_rate": 1.3974999999999997e-05,
+      "loss": 0.1967,
+      "step": 6650
+    },
+    {
+      "epoch": 0.6675,
+      "grad_norm": 3.469231605529785,
+      "learning_rate": 1.3870833333333331e-05,
+      "loss": 0.1671,
+      "step": 6675
+    },
+    {
+      "epoch": 0.67,
+      "grad_norm": 2.9588847160339355,
+      "learning_rate": 1.3766666666666665e-05,
+      "loss": 0.1613,
+      "step": 6700
+    },
+    {
+      "epoch": 0.6725,
+      "grad_norm": 3.3315980434417725,
+      "learning_rate": 1.3662499999999998e-05,
+      "loss": 0.1157,
+      "step": 6725
+    },
+    {
+      "epoch": 0.675,
+      "grad_norm": 4.094941139221191,
+      "learning_rate": 1.3558333333333334e-05,
+      "loss": 0.105,
+      "step": 6750
+    },
+    {
+      "epoch": 0.6775,
+      "grad_norm": 3.8783822059631348,
+      "learning_rate": 1.3454166666666664e-05,
+      "loss": 0.1277,
+      "step": 6775
+    },
+    {
+      "epoch": 0.68,
+      "grad_norm": 4.027767181396484,
+      "learning_rate": 1.3349999999999998e-05,
+      "loss": 0.2186,
+      "step": 6800
+    },
+    {
+      "epoch": 0.6825,
+      "grad_norm": 6.057985305786133,
+      "learning_rate": 1.3245833333333331e-05,
+      "loss": 0.1943,
+      "step": 6825
+    },
+    {
+      "epoch": 0.685,
+      "grad_norm": 5.209383964538574,
+      "learning_rate": 1.3141666666666665e-05,
+      "loss": 0.2008,
+      "step": 6850
+    },
+    {
+      "epoch": 0.6875,
+      "grad_norm": 4.257080554962158,
+      "learning_rate": 1.30375e-05,
+      "loss": 0.1915,
+      "step": 6875
+    },
+    {
+      "epoch": 0.69,
+      "grad_norm": 4.305874347686768,
+      "learning_rate": 1.2933333333333334e-05,
+      "loss": 0.1878,
+      "step": 6900
+    },
+    {
+      "epoch": 0.6925,
+      "grad_norm": 3.4165127277374268,
+      "learning_rate": 1.2829166666666664e-05,
+      "loss": 0.172,
+      "step": 6925
+    },
+    {
+      "epoch": 0.695,
+      "grad_norm": 4.07374382019043,
+      "learning_rate": 1.2724999999999998e-05,
+      "loss": 0.1759,
+      "step": 6950
+    },
+    {
+      "epoch": 0.6975,
+      "grad_norm": 4.26957368850708,
+      "learning_rate": 1.2620833333333333e-05,
+      "loss": 0.1943,
+      "step": 6975
+    },
+    {
+      "epoch": 0.7,
+      "grad_norm": 4.227268218994141,
+      "learning_rate": 1.2516666666666667e-05,
+      "loss": 0.1848,
+      "step": 7000
+    },
+    {
+      "epoch": 0.7,
+      "eval_loss": 0.35591790080070496,
+      "eval_runtime": 58.3501,
+      "eval_samples_per_second": 36.058,
+      "eval_steps_per_second": 2.262,
+      "eval_wer": 16.677573151350845,
+      "step": 7000
+    },
+    {
+      "epoch": 0.7025,
+      "grad_norm": 7.228043079376221,
+      "learning_rate": 1.2416666666666666e-05,
+      "loss": 0.3395,
+      "step": 7025
+    },
+    {
+      "epoch": 0.705,
+      "grad_norm": 8.51174259185791,
+      "learning_rate": 1.2312499999999998e-05,
+      "loss": 0.5988,
+      "step": 7050
+    },
+    {
+      "epoch": 0.7075,
+      "grad_norm": 5.47458553314209,
+      "learning_rate": 1.2208333333333331e-05,
+      "loss": 0.5235,
+      "step": 7075
+    },
+    {
+      "epoch": 0.71,
+      "grad_norm": 4.297708511352539,
+      "learning_rate": 1.2104166666666665e-05,
+      "loss": 0.4329,
+      "step": 7100
+    },
+    {
+      "epoch": 0.7125,
+      "grad_norm": 4.446761608123779,
+      "learning_rate": 1.1999999999999999e-05,
+      "loss": 0.238,
+      "step": 7125
+    },
+    {
+      "epoch": 0.715,
+      "grad_norm": 4.935601711273193,
+      "learning_rate": 1.1895833333333332e-05,
+      "loss": 0.1848,
+      "step": 7150
+    },
+    {
+      "epoch": 0.7175,
+      "grad_norm": 3.1769227981567383,
+      "learning_rate": 1.1791666666666666e-05,
+      "loss": 0.1695,
+      "step": 7175
+    },
+    {
+      "epoch": 0.72,
+      "grad_norm": 3.629225492477417,
+      "learning_rate": 1.1687499999999998e-05,
+      "loss": 0.1313,
+      "step": 7200
+    },
+    {
+      "epoch": 0.7225,
+      "grad_norm": 2.838541269302368,
+      "learning_rate": 1.1583333333333331e-05,
+      "loss": 0.1274,
+      "step": 7225
+    },
+    {
+      "epoch": 0.725,
+      "grad_norm": 2.8158419132232666,
+      "learning_rate": 1.1479166666666665e-05,
+      "loss": 0.1372,
+      "step": 7250
+    },
+    {
+      "epoch": 0.7275,
+      "grad_norm": 2.6257667541503906,
+      "learning_rate": 1.1374999999999999e-05,
+      "loss": 0.1132,
+      "step": 7275
+    },
+    {
+      "epoch": 0.73,
+      "grad_norm": 2.1429176330566406,
+      "learning_rate": 1.1270833333333332e-05,
+      "loss": 0.0966,
+      "step": 7300
+    },
+    {
+      "epoch": 0.7325,
+      "grad_norm": 2.7238330841064453,
+      "learning_rate": 1.1166666666666664e-05,
+      "loss": 0.1008,
+      "step": 7325
+    },
+    {
+      "epoch": 0.735,
+      "grad_norm": 4.3870344161987305,
+      "learning_rate": 1.1062499999999998e-05,
+      "loss": 0.1323,
+      "step": 7350
+    },
+    {
+      "epoch": 0.7375,
+      "grad_norm": 3.9880590438842773,
+      "learning_rate": 1.0958333333333331e-05,
+      "loss": 0.1818,
+      "step": 7375
+    },
+    {
+      "epoch": 0.74,
+      "grad_norm": 5.648591995239258,
+      "learning_rate": 1.0854166666666665e-05,
+      "loss": 0.1446,
+      "step": 7400
+    },
+    {
+      "epoch": 0.7425,
+      "grad_norm": 2.984696388244629,
+      "learning_rate": 1.075e-05,
+      "loss": 0.1677,
+      "step": 7425
+    },
+    {
+      "epoch": 0.745,
+      "grad_norm": 3.2804365158081055,
+      "learning_rate": 1.0645833333333334e-05,
+      "loss": 0.1169,
+      "step": 7450
+    },
+    {
+      "epoch": 0.7475,
+      "grad_norm": 2.6319210529327393,
+      "learning_rate": 1.0541666666666664e-05,
+      "loss": 0.1076,
+      "step": 7475
+    },
+    {
+      "epoch": 0.75,
+      "grad_norm": 3.784388780593872,
+      "learning_rate": 1.0437499999999998e-05,
+      "loss": 0.1044,
+      "step": 7500
+    },
+    {
+      "epoch": 0.7525,
+      "grad_norm": 4.933755397796631,
+      "learning_rate": 1.0333333333333332e-05,
+      "loss": 0.2042,
+      "step": 7525
+    },
+    {
+      "epoch": 0.755,
+      "grad_norm": 5.344150543212891,
+      "learning_rate": 1.0229166666666667e-05,
+      "loss": 0.2641,
+      "step": 7550
+    },
+    {
+      "epoch": 0.7575,
+      "grad_norm": 5.08501672744751,
+      "learning_rate": 1.0125e-05,
+      "loss": 0.293,
+      "step": 7575
+    },
+    {
+      "epoch": 0.76,
+      "grad_norm": 2.6458065509796143,
+      "learning_rate": 1.0020833333333334e-05,
+      "loss": 0.1825,
+      "step": 7600
+    },
+    {
+      "epoch": 0.7625,
+      "grad_norm": 4.469775199890137,
+      "learning_rate": 9.916666666666664e-06,
+      "loss": 0.1311,
+      "step": 7625
+    },
+    {
+      "epoch": 0.765,
+      "grad_norm": 2.5928456783294678,
+      "learning_rate": 9.8125e-06,
+      "loss": 0.1084,
+      "step": 7650
+    },
+    {
+      "epoch": 0.7675,
+      "grad_norm": 4.348841190338135,
+      "learning_rate": 9.708333333333333e-06,
+      "loss": 0.1447,
+      "step": 7675
+    },
+    {
+      "epoch": 0.77,
+      "grad_norm": 4.078396797180176,
+      "learning_rate": 9.604166666666667e-06,
+      "loss": 0.1727,
+      "step": 7700
+    },
+    {
+      "epoch": 0.7725,
+      "grad_norm": 3.949575185775757,
+      "learning_rate": 9.5e-06,
+      "loss": 0.1824,
+      "step": 7725
+    },
+    {
+      "epoch": 0.775,
+      "grad_norm": 3.2387499809265137,
+      "learning_rate": 9.39583333333333e-06,
+      "loss": 0.1671,
+      "step": 7750
+    },
+    {
+      "epoch": 0.7775,
+      "grad_norm": 3.390455722808838,
+      "learning_rate": 9.291666666666666e-06,
+      "loss": 0.1201,
+      "step": 7775
+    },
+    {
+      "epoch": 0.78,
+      "grad_norm": 4.596331596374512,
+      "learning_rate": 9.1875e-06,
+      "loss": 0.1413,
+      "step": 7800
+    },
+    {
+      "epoch": 0.7825,
+      "grad_norm": 3.470177412033081,
+      "learning_rate": 9.083333333333333e-06,
+      "loss": 0.3118,
+      "step": 7825
+    },
+    {
+      "epoch": 0.785,
+      "grad_norm": 2.657711982727051,
+      "learning_rate": 8.979166666666665e-06,
+      "loss": 0.1523,
+      "step": 7850
+    },
+    {
+      "epoch": 0.7875,
+      "grad_norm": 2.3193860054016113,
+      "learning_rate": 8.874999999999999e-06,
+      "loss": 0.1134,
+      "step": 7875
+    },
+    {
+      "epoch": 0.79,
+      "grad_norm": 4.3279008865356445,
+      "learning_rate": 8.770833333333333e-06,
+      "loss": 0.1089,
+      "step": 7900
+    },
+    {
+      "epoch": 0.7925,
+      "grad_norm": 2.435753107070923,
+      "learning_rate": 8.666666666666666e-06,
+      "loss": 0.1112,
+      "step": 7925
+    },
+    {
+      "epoch": 0.795,
+      "grad_norm": 2.8667356967926025,
+      "learning_rate": 8.5625e-06,
+      "loss": 0.1072,
+      "step": 7950
+    },
+    {
+      "epoch": 0.7975,
+      "grad_norm": 2.846033811569214,
+      "learning_rate": 8.458333333333333e-06,
+      "loss": 0.1151,
+      "step": 7975
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 3.3730928897857666,
+      "learning_rate": 8.354166666666665e-06,
+      "loss": 0.1434,
+      "step": 8000
+    },
+    {
+      "epoch": 0.8,
+      "eval_loss": 0.300187349319458,
+      "eval_runtime": 58.5676,
+      "eval_samples_per_second": 35.924,
+      "eval_steps_per_second": 2.254,
+      "eval_wer": 14.985509956062447,
+      "step": 8000
+    },
+    {
+      "epoch": 0.8025,
+      "grad_norm": 4.13847017288208,
+      "learning_rate": 8.249999999999999e-06,
+      "loss": 0.1658,
+      "step": 8025
+    },
+    {
+      "epoch": 0.805,
+      "grad_norm": 3.6509456634521484,
+      "learning_rate": 8.145833333333333e-06,
+      "loss": 0.1818,
+      "step": 8050
+    },
+    {
+      "epoch": 0.8075,
+      "grad_norm": 2.960130453109741,
+      "learning_rate": 8.041666666666666e-06,
+      "loss": 0.135,
+      "step": 8075
+    },
+    {
+      "epoch": 0.81,
+      "grad_norm": 2.997581720352173,
+      "learning_rate": 7.9375e-06,
+      "loss": 0.1213,
+      "step": 8100
+    },
+    {
+      "epoch": 0.8125,
+      "grad_norm": 2.4850475788116455,
+      "learning_rate": 7.833333333333333e-06,
+      "loss": 0.1052,
+      "step": 8125
+    },
+    {
+      "epoch": 0.815,
+      "grad_norm": 3.6655027866363525,
+      "learning_rate": 7.729166666666665e-06,
+      "loss": 0.1054,
+      "step": 8150
+    },
+    {
+      "epoch": 0.8175,
+      "grad_norm": 2.144341468811035,
+      "learning_rate": 7.625e-06,
+      "loss": 0.0934,
+      "step": 8175
+    },
+    {
+      "epoch": 0.82,
+      "grad_norm": 3.284132242202759,
+      "learning_rate": 7.520833333333332e-06,
+      "loss": 0.0762,
+      "step": 8200
+    },
+    {
+      "epoch": 0.8225,
+      "grad_norm": 2.597266912460327,
+      "learning_rate": 7.416666666666666e-06,
+      "loss": 0.0982,
+      "step": 8225
+    },
+    {
+      "epoch": 0.825,
+      "grad_norm": 3.4222335815429688,
+      "learning_rate": 7.3125e-06,
+      "loss": 0.0851,
+      "step": 8250
+    },
+    {
+      "epoch": 0.8275,
+      "grad_norm": 2.741823434829712,
+      "learning_rate": 7.208333333333333e-06,
+      "loss": 0.092,
+      "step": 8275
+    },
+    {
+      "epoch": 0.83,
+      "grad_norm": 2.2585489749908447,
+      "learning_rate": 7.104166666666666e-06,
+      "loss": 0.0948,
+      "step": 8300
+    },
+    {
+      "epoch": 0.8325,
+      "grad_norm": 3.675316572189331,
+      "learning_rate": 7e-06,
+      "loss": 0.1375,
+      "step": 8325
+    },
+    {
+      "epoch": 0.835,
+      "grad_norm": 4.135271072387695,
+      "learning_rate": 6.895833333333333e-06,
+      "loss": 0.1713,
+      "step": 8350
+    },
+    {
+      "epoch": 0.8375,
+      "grad_norm": 4.943787097930908,
+      "learning_rate": 6.791666666666666e-06,
+      "loss": 0.2061,
+      "step": 8375
+    },
+    {
+      "epoch": 0.84,
+      "grad_norm": 2.7876579761505127,
+      "learning_rate": 6.6875e-06,
+      "loss": 0.1586,
+      "step": 8400
+    },
+    {
+      "epoch": 0.8425,
+      "grad_norm": 3.986652135848999,
+      "learning_rate": 6.583333333333333e-06,
+      "loss": 0.1148,
+      "step": 8425
+    },
+    {
+      "epoch": 0.845,
+      "grad_norm": 2.5285580158233643,
+      "learning_rate": 6.479166666666666e-06,
+      "loss": 0.1098,
+      "step": 8450
+    },
+    {
+      "epoch": 0.8475,
+      "grad_norm": 3.109323263168335,
+      "learning_rate": 6.375e-06,
+      "loss": 0.1222,
+      "step": 8475
+    },
+    {
+      "epoch": 0.85,
+      "grad_norm": 3.02284574508667,
+      "learning_rate": 6.270833333333333e-06,
+      "loss": 0.1371,
+      "step": 8500
+    },
+    {
+      "epoch": 0.8525,
+      "grad_norm": 3.8272876739501953,
+      "learning_rate": 6.166666666666666e-06,
+      "loss": 0.1406,
+      "step": 8525
+    },
+    {
+      "epoch": 0.855,
+      "grad_norm": 5.428800106048584,
+      "learning_rate": 6.062499999999999e-06,
+      "loss": 0.1932,
+      "step": 8550
+    },
+    {
+      "epoch": 0.8575,
+      "grad_norm": 5.56389045715332,
+      "learning_rate": 5.958333333333333e-06,
+      "loss": 0.3211,
+      "step": 8575
+    },
+    {
+      "epoch": 0.86,
+      "grad_norm": 7.038808822631836,
+      "learning_rate": 5.854166666666666e-06,
+      "loss": 0.4388,
+      "step": 8600
+    },
+    {
+      "epoch": 0.8625,
+      "grad_norm": 7.0381245613098145,
+      "learning_rate": 5.749999999999999e-06,
+      "loss": 0.515,
+      "step": 8625
+    },
+    {
+      "epoch": 0.865,
+      "grad_norm": 3.0722436904907227,
+      "learning_rate": 5.645833333333333e-06,
+      "loss": 0.3093,
+      "step": 8650
+    },
+    {
+      "epoch": 0.8675,
+      "grad_norm": 4.79665470123291,
+      "learning_rate": 5.541666666666666e-06,
+      "loss": 0.2125,
+      "step": 8675
+    },
+    {
+      "epoch": 0.87,
+      "grad_norm": 5.695593357086182,
+      "learning_rate": 5.437499999999999e-06,
+      "loss": 0.2446,
+      "step": 8700
+    },
+    {
+      "epoch": 0.8725,
+      "grad_norm": 3.4895339012145996,
+      "learning_rate": 5.333333333333333e-06,
+      "loss": 0.1659,
+      "step": 8725
+    },
+    {
+      "epoch": 0.875,
+      "grad_norm": 2.6607508659362793,
+      "learning_rate": 5.2291666666666664e-06,
+      "loss": 0.1084,
+      "step": 8750
+    },
+    {
+      "epoch": 0.8775,
+      "grad_norm": 2.7779884338378906,
+      "learning_rate": 5.124999999999999e-06,
+      "loss": 0.1082,
+      "step": 8775
+    },
+    {
+      "epoch": 0.88,
+      "grad_norm": 3.051637887954712,
+      "learning_rate": 5.020833333333333e-06,
+      "loss": 0.1218,
+      "step": 8800
+    },
+    {
+      "epoch": 0.8825,
+      "grad_norm": 3.9986276626586914,
+      "learning_rate": 4.9166666666666665e-06,
+      "loss": 0.1534,
+      "step": 8825
+    },
+    {
+      "epoch": 0.885,
+      "grad_norm": 3.7295081615448,
+      "learning_rate": 4.812499999999999e-06,
+      "loss": 0.1498,
+      "step": 8850
+    },
+    {
+      "epoch": 0.8875,
+      "grad_norm": 3.2010765075683594,
+      "learning_rate": 4.708333333333333e-06,
+      "loss": 0.1514,
+      "step": 8875
+    },
+    {
+      "epoch": 0.89,
+      "grad_norm": 3.19004487991333,
+      "learning_rate": 4.6041666666666665e-06,
+      "loss": 0.1095,
+      "step": 8900
+    },
+    {
+      "epoch": 0.8925,
+      "grad_norm": 3.772831916809082,
+      "learning_rate": 4.499999999999999e-06,
+      "loss": 0.1066,
+      "step": 8925
+    },
+    {
+      "epoch": 0.895,
+      "grad_norm": 2.938344717025757,
+      "learning_rate": 4.395833333333333e-06,
+      "loss": 0.1156,
+      "step": 8950
+    },
+    {
+      "epoch": 0.8975,
+      "grad_norm": 3.13283109664917,
+      "learning_rate": 4.2916666666666665e-06,
+      "loss": 0.0922,
+      "step": 8975
+    },
+    {
+      "epoch": 0.9,
+      "grad_norm": 2.813206672668457,
+      "learning_rate": 4.187499999999999e-06,
+      "loss": 0.0951,
+      "step": 9000
+    },
+    {
+      "epoch": 0.9,
+      "eval_loss": 0.2976318597793579,
+      "eval_runtime": 59.2686,
+      "eval_samples_per_second": 35.499,
+      "eval_steps_per_second": 2.227,
+      "eval_wer": 15.294007665700663,
+      "step": 9000
+    },
+    {
+      "epoch": 0.9025,
+      "grad_norm": 3.3851749897003174,
+      "learning_rate": 4.083333333333333e-06,
+      "loss": 0.1113,
+      "step": 9025
+    },
+    {
+      "epoch": 0.905,
+      "grad_norm": 3.7882187366485596,
+      "learning_rate": 3.9791666666666665e-06,
+      "loss": 0.1345,
+      "step": 9050
+    },
+    {
+      "epoch": 0.9075,
+      "grad_norm": 4.723577976226807,
+      "learning_rate": 3.874999999999999e-06,
+      "loss": 0.1739,
+      "step": 9075
+    },
+    {
+      "epoch": 0.91,
+      "grad_norm": 3.7890665531158447,
+      "learning_rate": 3.770833333333333e-06,
+      "loss": 0.173,
+      "step": 9100
+    },
+    {
+      "epoch": 0.9125,
+      "grad_norm": 3.224851608276367,
+      "learning_rate": 3.6666666666666666e-06,
+      "loss": 0.1381,
+      "step": 9125
+    },
+    {
+      "epoch": 0.915,
+      "grad_norm": 2.4666051864624023,
+      "learning_rate": 3.5624999999999998e-06,
+      "loss": 0.1291,
+      "step": 9150
+    },
+    {
+      "epoch": 0.9175,
+      "grad_norm": 3.090360403060913,
+      "learning_rate": 3.458333333333333e-06,
+      "loss": 0.1249,
+      "step": 9175
+    },
+    {
+      "epoch": 0.92,
+      "grad_norm": 4.420276641845703,
+      "learning_rate": 3.354166666666666e-06,
+      "loss": 0.1292,
+      "step": 9200
+    },
+    {
+      "epoch": 0.9225,
+      "grad_norm": 4.0348663330078125,
+      "learning_rate": 3.25e-06,
+      "loss": 0.1508,
+      "step": 9225
+    },
+    {
+      "epoch": 0.925,
+      "grad_norm": 4.426786422729492,
+      "learning_rate": 3.145833333333333e-06,
+      "loss": 0.1749,
+      "step": 9250
+    },
+    {
+      "epoch": 0.9275,
+      "grad_norm": 3.2825982570648193,
+      "learning_rate": 3.041666666666666e-06,
+      "loss": 0.1932,
+      "step": 9275
+    },
+    {
+      "epoch": 0.93,
+      "grad_norm": 2.628441572189331,
+      "learning_rate": 2.9375e-06,
+      "loss": 0.1182,
+      "step": 9300
+    },
+    {
+      "epoch": 0.9325,
+      "grad_norm": 2.464374303817749,
+      "learning_rate": 2.833333333333333e-06,
+      "loss": 0.1196,
+      "step": 9325
+    },
+    {
+      "epoch": 0.935,
+      "grad_norm": 2.491689682006836,
+      "learning_rate": 2.7291666666666662e-06,
+      "loss": 0.0968,
+      "step": 9350
+    },
+    {
+      "epoch": 0.9375,
+      "grad_norm": 3.4698681831359863,
+      "learning_rate": 2.625e-06,
+      "loss": 0.1251,
+      "step": 9375
+    },
+    {
+      "epoch": 0.94,
+      "grad_norm": 3.9469587802886963,
+      "learning_rate": 2.520833333333333e-06,
+      "loss": 0.1507,
+      "step": 9400
+    },
+    {
+      "epoch": 0.9425,
+      "grad_norm": 3.5354082584381104,
+      "learning_rate": 2.4166666666666663e-06,
+      "loss": 0.169,
+      "step": 9425
+    },
+    {
+      "epoch": 0.945,
+      "grad_norm": 2.7507169246673584,
+      "learning_rate": 2.3125e-06,
+      "loss": 0.2939,
+      "step": 9450
+    },
+    {
+      "epoch": 0.9475,
+      "grad_norm": 3.2346575260162354,
+      "learning_rate": 2.208333333333333e-06,
+      "loss": 0.1602,
+      "step": 9475
+    },
+    {
+      "epoch": 0.95,
+      "grad_norm": 4.097684860229492,
+      "learning_rate": 2.1041666666666667e-06,
+      "loss": 0.134,
+      "step": 9500
+    },
+    {
+      "epoch": 0.9525,
+      "grad_norm": 4.1434855461120605,
+      "learning_rate": 2e-06,
+      "loss": 0.1553,
+      "step": 9525
+    },
+    {
+      "epoch": 0.955,
+      "grad_norm": 3.8769595623016357,
+      "learning_rate": 1.8958333333333331e-06,
+      "loss": 0.171,
+      "step": 9550
+    },
+    {
+      "epoch": 0.9575,
+      "grad_norm": 4.308934211730957,
+      "learning_rate": 1.7916666666666665e-06,
+      "loss": 0.1935,
+      "step": 9575
+    },
+    {
+      "epoch": 0.96,
+      "grad_norm": 3.0821316242218018,
+      "learning_rate": 1.6874999999999997e-06,
+      "loss": 0.1496,
+      "step": 9600
+    },
+    {
+      "epoch": 0.9625,
+      "grad_norm": 4.086252689361572,
+      "learning_rate": 1.5833333333333331e-06,
+      "loss": 0.1223,
+      "step": 9625
+    },
+    {
+      "epoch": 0.965,
+      "grad_norm": 2.3455817699432373,
+      "learning_rate": 1.4791666666666663e-06,
+      "loss": 0.0933,
+      "step": 9650
+    },
+    {
+      "epoch": 0.9675,
+      "grad_norm": 2.302103042602539,
+      "learning_rate": 1.375e-06,
+      "loss": 0.0959,
+      "step": 9675
+    },
+    {
+      "epoch": 0.97,
+      "grad_norm": 2.8981809616088867,
+      "learning_rate": 1.2708333333333334e-06,
+      "loss": 0.0963,
+      "step": 9700
+    },
+    {
+      "epoch": 0.9725,
+      "grad_norm": 1.89694082736969,
+      "learning_rate": 1.1666666666666666e-06,
+      "loss": 0.096,
+      "step": 9725
+    },
+    {
+      "epoch": 0.975,
+      "grad_norm": 2.363546371459961,
+      "learning_rate": 1.0624999999999998e-06,
+      "loss": 0.1005,
+      "step": 9750
+    },
+    {
+      "epoch": 0.9775,
+      "grad_norm": 2.9393630027770996,
+      "learning_rate": 9.583333333333334e-07,
+      "loss": 0.11,
+      "step": 9775
+    },
+    {
+      "epoch": 0.98,
+      "grad_norm": 2.824051856994629,
+      "learning_rate": 8.541666666666666e-07,
+      "loss": 0.153,
+      "step": 9800
+    },
+    {
+      "epoch": 0.9825,
+      "grad_norm": 3.817542552947998,
+      "learning_rate": 7.499999999999999e-07,
+      "loss": 0.1647,
+      "step": 9825
+    },
+    {
+      "epoch": 0.985,
+      "grad_norm": 3.506502151489258,
+      "learning_rate": 6.458333333333332e-07,
+      "loss": 0.1499,
+      "step": 9850
+    },
+    {
+      "epoch": 0.9875,
+      "grad_norm": 2.9996325969696045,
+      "learning_rate": 5.416666666666666e-07,
+      "loss": 0.148,
+      "step": 9875
+    },
+    {
+      "epoch": 0.99,
+      "grad_norm": 6.828978538513184,
+      "learning_rate": 4.375e-07,
+      "loss": 0.2303,
+      "step": 9900
+    },
+    {
+      "epoch": 0.9925,
+      "grad_norm": 2.7112486362457275,
+      "learning_rate": 3.333333333333333e-07,
+      "loss": 0.195,
+      "step": 9925
+    },
+    {
+      "epoch": 0.995,
+      "grad_norm": 3.023608684539795,
+      "learning_rate": 2.2916666666666666e-07,
+      "loss": 0.106,
+      "step": 9950
+    },
+    {
+      "epoch": 0.9975,
+      "grad_norm": 2.6858067512512207,
+      "learning_rate": 1.25e-07,
+      "loss": 0.1,
+      "step": 9975
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 2.3674516677856445,
+      "learning_rate": 2.083333333333333e-08,
+      "loss": 0.106,
+      "step": 10000
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.2991788983345032,
+      "eval_runtime": 60.4166,
+      "eval_samples_per_second": 34.825,
+      "eval_steps_per_second": 2.185,
+      "eval_wer": 15.72403477610545,
+      "step": 10000
+    },
+    {
+      "epoch": 1.0,
+      "step": 10000,
+      "total_flos": 7.8780432384e+18,
+      "train_loss": 0.24547564173936845,
+      "train_runtime": 3924.8092,
+      "train_samples_per_second": 81.533,
+      "train_steps_per_second": 2.548
+    }
+  ],
+  "logging_steps": 25,
+  "max_steps": 10000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 9223372036854775807,
+  "save_steps": 1000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 7.8780432384e+18,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}

wandb/run-20250217_214618-7bygcjmf/files/output.log CHANGED Viewed

@@ -1901,3 +1901,161 @@ Training completed. Do not forget to share your model on huggingface.co/models =
 [INFO|feature_extraction_utils.py:437] 2025-02-17 22:51:55,409 >> Feature extractor saved in ./preprocessor_config.json
 [INFO|modelcard.py:449] 2025-02-17 22:51:55,555 >> Dropping the following result as it does not have all the necessary fields:
 {'task': {'name': 'Automatic Speech Recognition', 'type': 'automatic-speech-recognition'}, 'metrics': [{'name': 'Wer', 'type': 'wer', 'value': 15.72403477610545}]}

 [INFO|feature_extraction_utils.py:437] 2025-02-17 22:51:55,409 >> Feature extractor saved in ./preprocessor_config.json
 [INFO|modelcard.py:449] 2025-02-17 22:51:55,555 >> Dropping the following result as it does not have all the necessary fields:
 {'task': {'name': 'Automatic Speech Recognition', 'type': 'automatic-speech-recognition'}, 'metrics': [{'name': 'Wer', 'type': 'wer', 'value': 15.72403477610545}]}
+***** train metrics *****
+  epoch                    =          1.0
+  total_flos               = 7336999511GF
+  train_loss               =       0.2455
+  train_runtime            =   1:05:24.80
+  train_samples_per_second =       81.533
+  train_steps_per_second   =        2.548
+02/17/2025 22:52:00 - INFO - __main__ - *** Evaluate ***
+[INFO|trainer.py:4258] 2025-02-17 22:52:00,618 >>
+***** Running Evaluation *****
+[INFO|trainer.py:4262] 2025-02-17 22:52:00,618 >>   Num examples: Unknown
+[INFO|trainer.py:4263] 2025-02-17 22:52:00,618 >>   Batch size = 16
+[INFO|trainer_utils.py:837] 2025-02-17 22:52:07,406 >> The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`,  you can safely ignore this message.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:07,558 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:07,971 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:08,460 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:08,998 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:09,391 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:09,739 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:10,053 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:10,390 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:10,747 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:11,158 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:11,489 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:11,920 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:12,311 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:12,740 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:13,149 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:13,473 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:13,756 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:14,117 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:14,472 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:14,777 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:15,180 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:15,582 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:15,959 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:16,331 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:16,652 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:17,044 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:17,423 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:17,858 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:18,193 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:18,522 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:18,963 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:19,333 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:19,737 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:20,033 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:20,365 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:20,686 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:20,973 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:21,303 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:21,617 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:21,910 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:22,221 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:22,523 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:22,801 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:23,099 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:23,388 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:23,728 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:24,027 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:24,309 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:24,703 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:25,003 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:25,312 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:25,620 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:25,884 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:26,185 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:26,482 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:26,815 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:27,073 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:27,339 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:27,654 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:27,920 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:28,245 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:28,534 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:28,844 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:29,140 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:29,396 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:29,640 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:29,941 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:30,234 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:30,533 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:30,817 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:31,106 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:31,443 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:31,731 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:32,072 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:32,372 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:32,682 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:32,944 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:33,233 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:33,533 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:33,814 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:34,097 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:34,373 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:34,669 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:34,984 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:35,296 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:35,573 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:35,871 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:36,225 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:36,541 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:36,910 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:37,225 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:37,535 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:37,824 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:38,106 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:38,452 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:38,768 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:39,069 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:39,373 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:39,677 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:39,986 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:40,291 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:40,622 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:40,924 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:41,251 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:41,603 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:41,908 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:42,225 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:42,565 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:42,853 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:43,138 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:43,452 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:43,769 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:44,059 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:44,328 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:44,620 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:44,899 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:45,215 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:45,514 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:45,807 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:46,100 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:46,406 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:46,683 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:46,992 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:47,301 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:47,584 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:47,862 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:48,146 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:48,450 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:48,741 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:49,038 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:49,299 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+[INFO|generation_whisper.py:1844] 2025-02-17 22:52:49,536 >> Increase max_length from 225 to 228 since input is conditioned on previous segment.
+***** eval metrics *****
+  epoch                   =        1.0
+  eval_loss               =     0.3002
+  eval_runtime            = 0:00:56.93
+  eval_samples_per_second =     36.956
+  eval_steps_per_second   =      2.319
+  eval_wer                =    14.9855
+[INFO|trainer.py:3942] 2025-02-17 22:52:57,551 >> Saving model checkpoint to ./
+[INFO|configuration_utils.py:423] 2025-02-17 22:52:57,552 >> Configuration saved in ./config.json
+[INFO|configuration_utils.py:909] 2025-02-17 22:52:57,553 >> Configuration saved in ./generation_config.json
+[INFO|modeling_utils.py:3040] 2025-02-17 22:52:57,945 >> Model weights saved in ./model.safetensors
+[INFO|feature_extraction_utils.py:437] 2025-02-17 22:52:57,947 >> Feature extractor saved in ./preprocessor_config.json
+run-7bygcjmf.wandb: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.98M/4.98M [00:01<00:00, 3.58MB/s]

wandb/run-20250217_214618-7bygcjmf/run-7bygcjmf.wandb CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4b8a57613cf10725c6769a6e7ad801d52e9a486bfd327b974ae5e778d6cdd2a6
-size 4947968

 version https://git-lfs.github.com/spec/v1
+oid sha256:7b24350895068d2eeaab4073bdf0d0a76ef0d5ec72269f30f06e8afe5fbb6315
+size 4980736