anantoj commited on
Commit
8654803
1 Parent(s): de79e0d

End of training

Browse files
Files changed (5) hide show
  1. all_results.json +14 -0
  2. eval_results.json +9 -0
  3. nohup.out +23 -0
  4. train_results.json +8 -0
  5. trainer_state.json +2728 -0
all_results.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 50.0,
3
+ "eval_loss": 0.06388434022665024,
4
+ "eval_runtime": 56.5665,
5
+ "eval_samples": 456,
6
+ "eval_samples_per_second": 8.061,
7
+ "eval_steps_per_second": 1.008,
8
+ "eval_wer": 0.044850498338870434,
9
+ "train_loss": 1.0736439298451375,
10
+ "train_runtime": 194988.743,
11
+ "train_samples": 22262,
12
+ "train_samples_per_second": 5.709,
13
+ "train_steps_per_second": 0.178
14
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 50.0,
3
+ "eval_loss": 0.06388434022665024,
4
+ "eval_runtime": 56.5665,
5
+ "eval_samples": 456,
6
+ "eval_samples_per_second": 8.061,
7
+ "eval_steps_per_second": 1.008,
8
+ "eval_wer": 0.044850498338870434
9
+ }
nohup.out CHANGED
@@ -5713,3 +5713,26 @@ The progress bars may be unreliable.
5713
 
5714
  Dropping the following result as it does not have all the necessary fields:
5715
  {'dataset': {'name': 'zeroth_korean_asr', 'type': 'zeroth_korean_asr', 'args': 'clean'}}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5716
  0%| | 0/57 [00:00<?, ?it/s]
5717
  4%|▎ | 2/57 [00:01<00:27, 1.99it/s]
5718
  5%|▌ | 3/57 [00:01<00:35, 1.50it/s]
5719
  7%|▋ | 4/57 [00:02<00:40, 1.29it/s]
5720
  9%|▉ | 5/57 [00:03<00:39, 1.31it/s]
5721
  11%|█ | 6/57 [00:04<00:40, 1.24it/s]
5722
  12%|█▏ | 7/57 [00:05<00:40, 1.22it/s]
5723
  14%|█▍ | 8/57 [00:06<00:40, 1.21it/s]
5724
  16%|█▌ | 9/57 [00:06<00:38, 1.24it/s]
5725
  18%|█▊ | 10/57 [00:07<00:37, 1.25it/s]
5726
  19%|█▉ | 11/57 [00:08<00:38, 1.20it/s]
5727
  21%|██ | 12/57 [00:09<00:39, 1.13it/s]
5728
  23%|██▎ | 13/57 [00:10<00:42, 1.05it/s]
5729
  25%|██▍ | 14/57 [00:11<00:40, 1.05it/s]
5730
  26%|██▋ | 15/57 [00:12<00:42, 1.00s/it]
5731
  28%|██▊ | 16/57 [00:13<00:41, 1.00s/it]
5732
  30%|██▉ | 17/57 [00:14<00:39, 1.01it/s]
5733
  32%|███▏ | 18/57 [00:15<00:37, 1.04it/s]
5734
  33%|███▎ | 19/57 [00:16<00:37, 1.02it/s]
5735
  35%|███▌ | 20/57 [00:17<00:35, 1.05it/s]
5736
  37%|███▋ | 21/57 [00:18<00:34, 1.04it/s]
5737
  39%|███▊ | 22/57 [00:19<00:34, 1.02it/s]
5738
  40%|████ | 23/57 [00:20<00:32, 1.06it/s]
5739
  42%|████▏ | 24/57 [00:21<00:30, 1.09it/s]
5740
  44%|████▍ | 25/57 [00:22<00:27, 1.16it/s]
5741
  46%|████▌ | 26/57 [00:22<00:24, 1.26it/s]
5742
  47%|████▋ | 27/57 [00:23<00:25, 1.18it/s]
5743
  49%|████▉ | 28/57 [00:24<00:26, 1.08it/s]
5744
  51%|█████ | 29/57 [00:25<00:28, 1.00s/it]
5745
  53%|█████▎ | 30/57 [00:26<00:26, 1.01it/s]
5746
  54%|█████▍ | 31/57 [00:28<00:28, 1.08s/it]
5747
  56%|█████▌ | 32/57 [00:28<00:24, 1.00it/s]
5748
  58%|█████▊ | 33/57 [00:30<00:24, 1.02s/it]
5749
  60%|█████▉ | 34/57 [00:31<00:23, 1.02s/it]
5750
  61%|██████▏ | 35/57 [00:31<00:21, 1.04it/s]
5751
  63%|██████▎ | 36/57 [00:32<00:20, 1.00it/s]
5752
  65%|██████▍ | 37/57 [00:33<00:17, 1.13it/s]
5753
  67%|██████▋ | 38/57 [00:34<00:18, 1.04it/s]
5754
  68%|██████▊ | 39/57 [00:35<00:16, 1.10it/s]
5755
  70%|███████ | 40/57 [00:36<00:16, 1.00it/s]
5756
  72%|███████▏ | 41/57 [00:37<00:16, 1.04s/it]
5757
  74%|███████▎ | 42/57 [00:38<00:14, 1.00it/s]
5758
  75%|███████▌ | 43/57 [00:39<00:13, 1.06it/s]
5759
  77%|███████▋ | 44/57 [00:40<00:11, 1.12it/s]
5760
  79%|███████▉ | 45/57 [00:41<00:11, 1.08it/s]
5761
  81%|████████ | 46/57 [00:42<00:10, 1.07it/s]
5762
  82%|████████▏ | 47/57 [00:42<00:08, 1.18it/s]
5763
  84%|████████▍ | 48/57 [00:43<00:07, 1.23it/s]
5764
  86%|████████▌ | 49/57 [00:44<00:06, 1.22it/s]
5765
  88%|████████▊ | 50/57 [00:45<00:05, 1.21it/s]
5766
  89%|████████▉ | 51/57 [00:46<00:04, 1.21it/s]
5767
  91%|█████████ | 52/57 [00:46<00:04, 1.22it/s]
5768
  93%|█████████▎| 53/57 [00:47<00:03, 1.22it/s]
5769
  95%|█████████▍| 54/57 [00:48<00:02, 1.33it/s]
5770
  96%|█████████▋| 55/57 [00:49<00:01, 1.38it/s]
5771
  98%|█████████▊| 56/57 [00:49<00:00, 1.38it/s]
 
 
 
 
 
5713
 
5714
  Dropping the following result as it does not have all the necessary fields:
5715
  {'dataset': {'name': 'zeroth_korean_asr', 'type': 'zeroth_korean_asr', 'args': 'clean'}}
5716
+ To https://huggingface.co/anantoj/wav2vec2-xls-r-1b-korean
5717
+ 3233080..de79e0d main -> main
5718
+
5719
+ 02/02/2022 17:38:45 - WARNING - huggingface_hub.repository - To https://huggingface.co/anantoj/wav2vec2-xls-r-1b-korean
5720
+ 3233080..de79e0d main -> main
5721
+
5722
+ ***** train metrics *****
5723
+ epoch = 50.0
5724
+ train_loss = 1.0736
5725
+ train_runtime = 2 days, 6:09:48.74
5726
+ train_samples = 22262
5727
+ train_samples_per_second = 5.709
5728
+ train_steps_per_second = 0.178
5729
+ 02/02/2022 17:38:48 - INFO - __main__ - *** Evaluate ***
5730
+ The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.
5731
+ ***** Running Evaluation *****
5732
+ Num examples = 456
5733
+ Batch size = 8
5734
+
5735
  0%| | 0/57 [00:00<?, ?it/s]
5736
  4%|▎ | 2/57 [00:01<00:27, 1.99it/s]
5737
  5%|▌ | 3/57 [00:01<00:35, 1.50it/s]
5738
  7%|▋ | 4/57 [00:02<00:40, 1.29it/s]
5739
  9%|▉ | 5/57 [00:03<00:39, 1.31it/s]
5740
  11%|█ | 6/57 [00:04<00:40, 1.24it/s]
5741
  12%|█▏ | 7/57 [00:05<00:40, 1.22it/s]
5742
  14%|█▍ | 8/57 [00:06<00:40, 1.21it/s]
5743
  16%|█▌ | 9/57 [00:06<00:38, 1.24it/s]
5744
  18%|█▊ | 10/57 [00:07<00:37, 1.25it/s]
5745
  19%|█▉ | 11/57 [00:08<00:38, 1.20it/s]
5746
  21%|██ | 12/57 [00:09<00:39, 1.13it/s]
5747
  23%|██▎ | 13/57 [00:10<00:42, 1.05it/s]
5748
  25%|██▍ | 14/57 [00:11<00:40, 1.05it/s]
5749
  26%|██▋ | 15/57 [00:12<00:42, 1.00s/it]
5750
  28%|██▊ | 16/57 [00:13<00:41, 1.00s/it]
5751
  30%|██▉ | 17/57 [00:14<00:39, 1.01it/s]
5752
  32%|███▏ | 18/57 [00:15<00:37, 1.04it/s]
5753
  33%|███▎ | 19/57 [00:16<00:37, 1.02it/s]
5754
  35%|███▌ | 20/57 [00:17<00:35, 1.05it/s]
5755
  37%|███▋ | 21/57 [00:18<00:34, 1.04it/s]
5756
  39%|███▊ | 22/57 [00:19<00:34, 1.02it/s]
5757
  40%|████ | 23/57 [00:20<00:32, 1.06it/s]
5758
  42%|████▏ | 24/57 [00:21<00:30, 1.09it/s]
5759
  44%|████▍ | 25/57 [00:22<00:27, 1.16it/s]
5760
  46%|████▌ | 26/57 [00:22<00:24, 1.26it/s]
5761
  47%|████▋ | 27/57 [00:23<00:25, 1.18it/s]
5762
  49%|████▉ | 28/57 [00:24<00:26, 1.08it/s]
5763
  51%|█████ | 29/57 [00:25<00:28, 1.00s/it]
5764
  53%|█████▎ | 30/57 [00:26<00:26, 1.01it/s]
5765
  54%|█████▍ | 31/57 [00:28<00:28, 1.08s/it]
5766
  56%|█████▌ | 32/57 [00:28<00:24, 1.00it/s]
5767
  58%|█████▊ | 33/57 [00:30<00:24, 1.02s/it]
5768
  60%|█████▉ | 34/57 [00:31<00:23, 1.02s/it]
5769
  61%|██████▏ | 35/57 [00:31<00:21, 1.04it/s]
5770
  63%|██████▎ | 36/57 [00:32<00:20, 1.00it/s]
5771
  65%|██████▍ | 37/57 [00:33<00:17, 1.13it/s]
5772
  67%|██████▋ | 38/57 [00:34<00:18, 1.04it/s]
5773
  68%|██████▊ | 39/57 [00:35<00:16, 1.10it/s]
5774
  70%|███████ | 40/57 [00:36<00:16, 1.00it/s]
5775
  72%|███████▏ | 41/57 [00:37<00:16, 1.04s/it]
5776
  74%|███████▎ | 42/57 [00:38<00:14, 1.00it/s]
5777
  75%|███████▌ | 43/57 [00:39<00:13, 1.06it/s]
5778
  77%|███████▋ | 44/57 [00:40<00:11, 1.12it/s]
5779
  79%|███████▉ | 45/57 [00:41<00:11, 1.08it/s]
5780
  81%|████████ | 46/57 [00:42<00:10, 1.07it/s]
5781
  82%|████████▏ | 47/57 [00:42<00:08, 1.18it/s]
5782
  84%|████████▍ | 48/57 [00:43<00:07, 1.23it/s]
5783
  86%|████████▌ | 49/57 [00:44<00:06, 1.22it/s]
5784
  88%|████████▊ | 50/57 [00:45<00:05, 1.21it/s]
5785
  89%|████████▉ | 51/57 [00:46<00:04, 1.21it/s]
5786
  91%|█████████ | 52/57 [00:46<00:04, 1.22it/s]
5787
  93%|█████████▎| 53/57 [00:47<00:03, 1.22it/s]
5788
  95%|█████████▍| 54/57 [00:48<00:02, 1.33it/s]
5789
  96%|█████████▋| 55/57 [00:49<00:01, 1.38it/s]
5790
  98%|█████████▊| 56/57 [00:49<00:00, 1.38it/s]
5791
+ Saving model checkpoint to ./
5792
+ Configuration saved in ./config.json
5793
+ Model weights saved in ./pytorch_model.bin
5794
+ Configuration saved in ./preprocessor_config.json
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 50.0,
3
+ "train_loss": 1.0736439298451375,
4
+ "train_runtime": 194988.743,
5
+ "train_samples": 22262,
6
+ "train_samples_per_second": 5.709,
7
+ "train_steps_per_second": 0.178
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,2728 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 49.99892202659001,
5
+ "global_step": 34750,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.14,
12
+ "learning_rate": 3.6e-06,
13
+ "loss": 27.8845,
14
+ "step": 100
15
+ },
16
+ {
17
+ "epoch": 0.29,
18
+ "learning_rate": 7.35e-06,
19
+ "loss": 5.2607,
20
+ "step": 200
21
+ },
22
+ {
23
+ "epoch": 0.43,
24
+ "learning_rate": 1.1099999999999999e-05,
25
+ "loss": 4.8397,
26
+ "step": 300
27
+ },
28
+ {
29
+ "epoch": 0.57,
30
+ "learning_rate": 1.485e-05,
31
+ "loss": 4.6751,
32
+ "step": 400
33
+ },
34
+ {
35
+ "epoch": 0.72,
36
+ "learning_rate": 1.8599999999999998e-05,
37
+ "loss": 4.603,
38
+ "step": 500
39
+ },
40
+ {
41
+ "epoch": 0.72,
42
+ "eval_loss": 4.657182693481445,
43
+ "eval_runtime": 61.7478,
44
+ "eval_samples_per_second": 7.385,
45
+ "eval_steps_per_second": 0.923,
46
+ "eval_wer": 0.9984898822108125,
47
+ "step": 500
48
+ },
49
+ {
50
+ "epoch": 0.86,
51
+ "learning_rate": 2.2349999999999998e-05,
52
+ "loss": 4.5246,
53
+ "step": 600
54
+ },
55
+ {
56
+ "epoch": 1.01,
57
+ "learning_rate": 2.6099999999999997e-05,
58
+ "loss": 3.8527,
59
+ "step": 700
60
+ },
61
+ {
62
+ "epoch": 1.15,
63
+ "learning_rate": 2.985e-05,
64
+ "loss": 3.0344,
65
+ "step": 800
66
+ },
67
+ {
68
+ "epoch": 1.29,
69
+ "learning_rate": 3.36e-05,
70
+ "loss": 2.7616,
71
+ "step": 900
72
+ },
73
+ {
74
+ "epoch": 1.44,
75
+ "learning_rate": 3.735e-05,
76
+ "loss": 2.6314,
77
+ "step": 1000
78
+ },
79
+ {
80
+ "epoch": 1.44,
81
+ "eval_loss": 2.0424230098724365,
82
+ "eval_runtime": 57.8725,
83
+ "eval_samples_per_second": 7.879,
84
+ "eval_steps_per_second": 0.985,
85
+ "eval_wer": 0.9255511929930534,
86
+ "step": 1000
87
+ },
88
+ {
89
+ "epoch": 1.58,
90
+ "learning_rate": 4.11e-05,
91
+ "loss": 2.4936,
92
+ "step": 1100
93
+ },
94
+ {
95
+ "epoch": 1.73,
96
+ "learning_rate": 4.484999999999999e-05,
97
+ "loss": 2.4557,
98
+ "step": 1200
99
+ },
100
+ {
101
+ "epoch": 1.87,
102
+ "learning_rate": 4.8599999999999995e-05,
103
+ "loss": 2.3667,
104
+ "step": 1300
105
+ },
106
+ {
107
+ "epoch": 2.01,
108
+ "learning_rate": 5.234999999999999e-05,
109
+ "loss": 2.3147,
110
+ "step": 1400
111
+ },
112
+ {
113
+ "epoch": 2.16,
114
+ "learning_rate": 5.6099999999999995e-05,
115
+ "loss": 2.2708,
116
+ "step": 1500
117
+ },
118
+ {
119
+ "epoch": 2.16,
120
+ "eval_loss": 0.9889274835586548,
121
+ "eval_runtime": 59.1503,
122
+ "eval_samples_per_second": 7.709,
123
+ "eval_steps_per_second": 0.964,
124
+ "eval_wer": 0.6988825128360012,
125
+ "step": 1500
126
+ },
127
+ {
128
+ "epoch": 2.3,
129
+ "learning_rate": 5.985e-05,
130
+ "loss": 2.1797,
131
+ "step": 1600
132
+ },
133
+ {
134
+ "epoch": 2.45,
135
+ "learning_rate": 6.359999999999999e-05,
136
+ "loss": 2.1628,
137
+ "step": 1700
138
+ },
139
+ {
140
+ "epoch": 2.59,
141
+ "learning_rate": 6.735e-05,
142
+ "loss": 2.1662,
143
+ "step": 1800
144
+ },
145
+ {
146
+ "epoch": 2.73,
147
+ "learning_rate": 7.11e-05,
148
+ "loss": 2.1623,
149
+ "step": 1900
150
+ },
151
+ {
152
+ "epoch": 2.88,
153
+ "learning_rate": 7.484999999999999e-05,
154
+ "loss": 2.1769,
155
+ "step": 2000
156
+ },
157
+ {
158
+ "epoch": 2.88,
159
+ "eval_loss": 0.8366215825080872,
160
+ "eval_runtime": 56.673,
161
+ "eval_samples_per_second": 8.046,
162
+ "eval_steps_per_second": 1.006,
163
+ "eval_wer": 0.6312292358803987,
164
+ "step": 2000
165
+ },
166
+ {
167
+ "epoch": 3.02,
168
+ "learning_rate": 7.478015267175572e-05,
169
+ "loss": 2.1913,
170
+ "step": 2100
171
+ },
172
+ {
173
+ "epoch": 3.17,
174
+ "learning_rate": 7.455114503816793e-05,
175
+ "loss": 2.1906,
176
+ "step": 2200
177
+ },
178
+ {
179
+ "epoch": 3.31,
180
+ "learning_rate": 7.432213740458014e-05,
181
+ "loss": 2.1964,
182
+ "step": 2300
183
+ },
184
+ {
185
+ "epoch": 3.45,
186
+ "learning_rate": 7.409312977099236e-05,
187
+ "loss": 2.1548,
188
+ "step": 2400
189
+ },
190
+ {
191
+ "epoch": 3.6,
192
+ "learning_rate": 7.386412213740458e-05,
193
+ "loss": 2.1142,
194
+ "step": 2500
195
+ },
196
+ {
197
+ "epoch": 3.6,
198
+ "eval_loss": 0.7555108070373535,
199
+ "eval_runtime": 57.6601,
200
+ "eval_samples_per_second": 7.908,
201
+ "eval_steps_per_second": 0.989,
202
+ "eval_wer": 0.5998187858652975,
203
+ "step": 2500
204
+ },
205
+ {
206
+ "epoch": 3.74,
207
+ "learning_rate": 7.363511450381678e-05,
208
+ "loss": 2.1028,
209
+ "step": 2600
210
+ },
211
+ {
212
+ "epoch": 3.88,
213
+ "learning_rate": 7.3406106870229e-05,
214
+ "loss": 2.0524,
215
+ "step": 2700
216
+ },
217
+ {
218
+ "epoch": 4.03,
219
+ "learning_rate": 7.317709923664121e-05,
220
+ "loss": 2.0426,
221
+ "step": 2800
222
+ },
223
+ {
224
+ "epoch": 4.17,
225
+ "learning_rate": 7.29503816793893e-05,
226
+ "loss": 1.9613,
227
+ "step": 2900
228
+ },
229
+ {
230
+ "epoch": 4.32,
231
+ "learning_rate": 7.272137404580152e-05,
232
+ "loss": 2.0084,
233
+ "step": 3000
234
+ },
235
+ {
236
+ "epoch": 4.32,
237
+ "eval_loss": 0.7143925428390503,
238
+ "eval_runtime": 57.4263,
239
+ "eval_samples_per_second": 7.941,
240
+ "eval_steps_per_second": 0.993,
241
+ "eval_wer": 0.6002718212020538,
242
+ "step": 3000
243
+ },
244
+ {
245
+ "epoch": 4.46,
246
+ "learning_rate": 7.249236641221373e-05,
247
+ "loss": 2.0063,
248
+ "step": 3100
249
+ },
250
+ {
251
+ "epoch": 4.6,
252
+ "learning_rate": 7.226335877862594e-05,
253
+ "loss": 1.9758,
254
+ "step": 3200
255
+ },
256
+ {
257
+ "epoch": 4.75,
258
+ "learning_rate": 7.203435114503816e-05,
259
+ "loss": 1.9313,
260
+ "step": 3300
261
+ },
262
+ {
263
+ "epoch": 4.89,
264
+ "learning_rate": 7.180534351145037e-05,
265
+ "loss": 1.9009,
266
+ "step": 3400
267
+ },
268
+ {
269
+ "epoch": 5.04,
270
+ "learning_rate": 7.15763358778626e-05,
271
+ "loss": 1.9272,
272
+ "step": 3500
273
+ },
274
+ {
275
+ "epoch": 5.04,
276
+ "eval_loss": 0.631069004535675,
277
+ "eval_runtime": 58.3633,
278
+ "eval_samples_per_second": 7.813,
279
+ "eval_steps_per_second": 0.977,
280
+ "eval_wer": 0.5460585925702205,
281
+ "step": 3500
282
+ },
283
+ {
284
+ "epoch": 5.18,
285
+ "learning_rate": 7.13473282442748e-05,
286
+ "loss": 1.8728,
287
+ "step": 3600
288
+ },
289
+ {
290
+ "epoch": 5.32,
291
+ "learning_rate": 7.111832061068701e-05,
292
+ "loss": 1.8675,
293
+ "step": 3700
294
+ },
295
+ {
296
+ "epoch": 5.47,
297
+ "learning_rate": 7.088931297709923e-05,
298
+ "loss": 1.8549,
299
+ "step": 3800
300
+ },
301
+ {
302
+ "epoch": 5.61,
303
+ "learning_rate": 7.066030534351145e-05,
304
+ "loss": 1.8491,
305
+ "step": 3900
306
+ },
307
+ {
308
+ "epoch": 5.75,
309
+ "learning_rate": 7.043358778625953e-05,
310
+ "loss": 1.8687,
311
+ "step": 4000
312
+ },
313
+ {
314
+ "epoch": 5.75,
315
+ "eval_loss": 0.6252322196960449,
316
+ "eval_runtime": 56.9578,
317
+ "eval_samples_per_second": 8.006,
318
+ "eval_steps_per_second": 1.001,
319
+ "eval_wer": 0.5430383569918453,
320
+ "step": 4000
321
+ },
322
+ {
323
+ "epoch": 5.9,
324
+ "learning_rate": 7.020458015267175e-05,
325
+ "loss": 1.8377,
326
+ "step": 4100
327
+ },
328
+ {
329
+ "epoch": 6.04,
330
+ "learning_rate": 6.997557251908396e-05,
331
+ "loss": 1.8436,
332
+ "step": 4200
333
+ },
334
+ {
335
+ "epoch": 6.19,
336
+ "learning_rate": 6.974656488549617e-05,
337
+ "loss": 1.7895,
338
+ "step": 4300
339
+ },
340
+ {
341
+ "epoch": 6.33,
342
+ "learning_rate": 6.951755725190839e-05,
343
+ "loss": 1.8045,
344
+ "step": 4400
345
+ },
346
+ {
347
+ "epoch": 6.47,
348
+ "learning_rate": 6.928854961832061e-05,
349
+ "loss": 1.8186,
350
+ "step": 4500
351
+ },
352
+ {
353
+ "epoch": 6.47,
354
+ "eval_loss": 0.5491229891777039,
355
+ "eval_runtime": 56.8943,
356
+ "eval_samples_per_second": 8.015,
357
+ "eval_steps_per_second": 1.002,
358
+ "eval_wer": 0.49879190576864996,
359
+ "step": 4500
360
+ },
361
+ {
362
+ "epoch": 6.62,
363
+ "learning_rate": 6.905954198473281e-05,
364
+ "loss": 1.79,
365
+ "step": 4600
366
+ },
367
+ {
368
+ "epoch": 6.76,
369
+ "learning_rate": 6.883053435114503e-05,
370
+ "loss": 1.8018,
371
+ "step": 4700
372
+ },
373
+ {
374
+ "epoch": 6.91,
375
+ "learning_rate": 6.860152671755724e-05,
376
+ "loss": 1.7966,
377
+ "step": 4800
378
+ },
379
+ {
380
+ "epoch": 7.05,
381
+ "learning_rate": 6.837251908396947e-05,
382
+ "loss": 1.7979,
383
+ "step": 4900
384
+ },
385
+ {
386
+ "epoch": 7.19,
387
+ "learning_rate": 6.814351145038167e-05,
388
+ "loss": 1.7364,
389
+ "step": 5000
390
+ },
391
+ {
392
+ "epoch": 7.19,
393
+ "eval_loss": 0.5463064908981323,
394
+ "eval_runtime": 56.8201,
395
+ "eval_samples_per_second": 8.025,
396
+ "eval_steps_per_second": 1.003,
397
+ "eval_wer": 0.4959226819691936,
398
+ "step": 5000
399
+ },
400
+ {
401
+ "epoch": 7.34,
402
+ "learning_rate": 6.791450381679388e-05,
403
+ "loss": 1.7467,
404
+ "step": 5100
405
+ },
406
+ {
407
+ "epoch": 7.48,
408
+ "learning_rate": 6.76854961832061e-05,
409
+ "loss": 1.7082,
410
+ "step": 5200
411
+ },
412
+ {
413
+ "epoch": 7.63,
414
+ "learning_rate": 6.745648854961832e-05,
415
+ "loss": 1.7084,
416
+ "step": 5300
417
+ },
418
+ {
419
+ "epoch": 7.77,
420
+ "learning_rate": 6.72297709923664e-05,
421
+ "loss": 1.6732,
422
+ "step": 5400
423
+ },
424
+ {
425
+ "epoch": 7.91,
426
+ "learning_rate": 6.700076335877863e-05,
427
+ "loss": 1.6809,
428
+ "step": 5500
429
+ },
430
+ {
431
+ "epoch": 7.91,
432
+ "eval_loss": 0.47242555022239685,
433
+ "eval_runtime": 56.3595,
434
+ "eval_samples_per_second": 8.091,
435
+ "eval_steps_per_second": 1.011,
436
+ "eval_wer": 0.4483539716097856,
437
+ "step": 5500
438
+ },
439
+ {
440
+ "epoch": 8.06,
441
+ "learning_rate": 6.677175572519083e-05,
442
+ "loss": 1.7139,
443
+ "step": 5600
444
+ },
445
+ {
446
+ "epoch": 8.2,
447
+ "learning_rate": 6.654274809160304e-05,
448
+ "loss": 1.708,
449
+ "step": 5700
450
+ },
451
+ {
452
+ "epoch": 8.34,
453
+ "learning_rate": 6.631374045801526e-05,
454
+ "loss": 1.6987,
455
+ "step": 5800
456
+ },
457
+ {
458
+ "epoch": 8.49,
459
+ "learning_rate": 6.608473282442748e-05,
460
+ "loss": 1.6705,
461
+ "step": 5900
462
+ },
463
+ {
464
+ "epoch": 8.63,
465
+ "learning_rate": 6.585572519083968e-05,
466
+ "loss": 1.641,
467
+ "step": 6000
468
+ },
469
+ {
470
+ "epoch": 8.63,
471
+ "eval_loss": 0.46793004870414734,
472
+ "eval_runtime": 56.3201,
473
+ "eval_samples_per_second": 8.097,
474
+ "eval_steps_per_second": 1.012,
475
+ "eval_wer": 0.44608879492600423,
476
+ "step": 6000
477
+ },
478
+ {
479
+ "epoch": 8.78,
480
+ "learning_rate": 6.56267175572519e-05,
481
+ "loss": 1.6277,
482
+ "step": 6100
483
+ },
484
+ {
485
+ "epoch": 8.92,
486
+ "learning_rate": 6.539770992366411e-05,
487
+ "loss": 1.6529,
488
+ "step": 6200
489
+ },
490
+ {
491
+ "epoch": 9.06,
492
+ "learning_rate": 6.516870229007634e-05,
493
+ "loss": 1.6195,
494
+ "step": 6300
495
+ },
496
+ {
497
+ "epoch": 9.21,
498
+ "learning_rate": 6.493969465648855e-05,
499
+ "loss": 1.5983,
500
+ "step": 6400
501
+ },
502
+ {
503
+ "epoch": 9.35,
504
+ "learning_rate": 6.471068702290075e-05,
505
+ "loss": 1.572,
506
+ "step": 6500
507
+ },
508
+ {
509
+ "epoch": 9.35,
510
+ "eval_loss": 0.4386586844921112,
511
+ "eval_runtime": 56.3246,
512
+ "eval_samples_per_second": 8.096,
513
+ "eval_steps_per_second": 1.012,
514
+ "eval_wer": 0.42358803986710963,
515
+ "step": 6500
516
+ },
517
+ {
518
+ "epoch": 9.5,
519
+ "learning_rate": 6.448167938931296e-05,
520
+ "loss": 1.5749,
521
+ "step": 6600
522
+ },
523
+ {
524
+ "epoch": 9.64,
525
+ "learning_rate": 6.425267175572519e-05,
526
+ "loss": 1.5561,
527
+ "step": 6700
528
+ },
529
+ {
530
+ "epoch": 9.78,
531
+ "learning_rate": 6.40236641221374e-05,
532
+ "loss": 1.559,
533
+ "step": 6800
534
+ },
535
+ {
536
+ "epoch": 9.93,
537
+ "learning_rate": 6.379465648854962e-05,
538
+ "loss": 1.5419,
539
+ "step": 6900
540
+ },
541
+ {
542
+ "epoch": 10.07,
543
+ "learning_rate": 6.356564885496182e-05,
544
+ "loss": 1.5256,
545
+ "step": 7000
546
+ },
547
+ {
548
+ "epoch": 10.07,
549
+ "eval_loss": 0.39700397849082947,
550
+ "eval_runtime": 57.819,
551
+ "eval_samples_per_second": 7.887,
552
+ "eval_steps_per_second": 0.986,
553
+ "eval_wer": 0.4003322259136213,
554
+ "step": 7000
555
+ },
556
+ {
557
+ "epoch": 10.22,
558
+ "learning_rate": 6.333664122137404e-05,
559
+ "loss": 1.5122,
560
+ "step": 7100
561
+ },
562
+ {
563
+ "epoch": 10.36,
564
+ "learning_rate": 6.310763358778626e-05,
565
+ "loss": 1.4812,
566
+ "step": 7200
567
+ },
568
+ {
569
+ "epoch": 10.5,
570
+ "learning_rate": 6.287862595419847e-05,
571
+ "loss": 1.4997,
572
+ "step": 7300
573
+ },
574
+ {
575
+ "epoch": 10.65,
576
+ "learning_rate": 6.264961832061068e-05,
577
+ "loss": 1.4933,
578
+ "step": 7400
579
+ },
580
+ {
581
+ "epoch": 10.79,
582
+ "learning_rate": 6.24206106870229e-05,
583
+ "loss": 1.5044,
584
+ "step": 7500
585
+ },
586
+ {
587
+ "epoch": 10.79,
588
+ "eval_loss": 0.3689935803413391,
589
+ "eval_runtime": 56.6639,
590
+ "eval_samples_per_second": 8.047,
591
+ "eval_steps_per_second": 1.006,
592
+ "eval_wer": 0.3893083660525521,
593
+ "step": 7500
594
+ },
595
+ {
596
+ "epoch": 10.93,
597
+ "learning_rate": 6.219160305343511e-05,
598
+ "loss": 1.475,
599
+ "step": 7600
600
+ },
601
+ {
602
+ "epoch": 11.08,
603
+ "learning_rate": 6.196259541984733e-05,
604
+ "loss": 1.4826,
605
+ "step": 7700
606
+ },
607
+ {
608
+ "epoch": 11.22,
609
+ "learning_rate": 6.173358778625954e-05,
610
+ "loss": 1.4336,
611
+ "step": 7800
612
+ },
613
+ {
614
+ "epoch": 11.37,
615
+ "learning_rate": 6.150458015267175e-05,
616
+ "loss": 1.4338,
617
+ "step": 7900
618
+ },
619
+ {
620
+ "epoch": 11.51,
621
+ "learning_rate": 6.127557251908397e-05,
622
+ "loss": 1.4563,
623
+ "step": 8000
624
+ },
625
+ {
626
+ "epoch": 11.51,
627
+ "eval_loss": 0.37522608041763306,
628
+ "eval_runtime": 56.3933,
629
+ "eval_samples_per_second": 8.086,
630
+ "eval_steps_per_second": 1.011,
631
+ "eval_wer": 0.387496224705527,
632
+ "step": 8000
633
+ },
634
+ {
635
+ "epoch": 11.65,
636
+ "learning_rate": 6.104656488549618e-05,
637
+ "loss": 1.45,
638
+ "step": 8100
639
+ },
640
+ {
641
+ "epoch": 11.8,
642
+ "learning_rate": 6.0817557251908386e-05,
643
+ "loss": 1.4476,
644
+ "step": 8200
645
+ },
646
+ {
647
+ "epoch": 11.94,
648
+ "learning_rate": 6.0588549618320606e-05,
649
+ "loss": 1.4255,
650
+ "step": 8300
651
+ },
652
+ {
653
+ "epoch": 12.09,
654
+ "learning_rate": 6.035954198473282e-05,
655
+ "loss": 1.4266,
656
+ "step": 8400
657
+ },
658
+ {
659
+ "epoch": 12.23,
660
+ "learning_rate": 6.013053435114503e-05,
661
+ "loss": 1.394,
662
+ "step": 8500
663
+ },
664
+ {
665
+ "epoch": 12.23,
666
+ "eval_loss": 0.3385707437992096,
667
+ "eval_runtime": 56.4365,
668
+ "eval_samples_per_second": 8.08,
669
+ "eval_steps_per_second": 1.01,
670
+ "eval_wer": 0.35668982180610087,
671
+ "step": 8500
672
+ },
673
+ {
674
+ "epoch": 12.37,
675
+ "learning_rate": 5.9901526717557246e-05,
676
+ "loss": 1.4002,
677
+ "step": 8600
678
+ },
679
+ {
680
+ "epoch": 12.52,
681
+ "learning_rate": 5.9672519083969466e-05,
682
+ "loss": 1.3823,
683
+ "step": 8700
684
+ },
685
+ {
686
+ "epoch": 12.66,
687
+ "learning_rate": 5.944351145038167e-05,
688
+ "loss": 1.3834,
689
+ "step": 8800
690
+ },
691
+ {
692
+ "epoch": 12.8,
693
+ "learning_rate": 5.9214503816793886e-05,
694
+ "loss": 1.3765,
695
+ "step": 8900
696
+ },
697
+ {
698
+ "epoch": 12.95,
699
+ "learning_rate": 5.89854961832061e-05,
700
+ "loss": 1.3641,
701
+ "step": 9000
702
+ },
703
+ {
704
+ "epoch": 12.95,
705
+ "eval_loss": 0.3289976716041565,
706
+ "eval_runtime": 57.6648,
707
+ "eval_samples_per_second": 7.908,
708
+ "eval_steps_per_second": 0.988,
709
+ "eval_wer": 0.346723044397463,
710
+ "step": 9000
711
+ },
712
+ {
713
+ "epoch": 13.09,
714
+ "learning_rate": 5.875648854961832e-05,
715
+ "loss": 1.3599,
716
+ "step": 9100
717
+ },
718
+ {
719
+ "epoch": 13.24,
720
+ "learning_rate": 5.8527480916030534e-05,
721
+ "loss": 1.3234,
722
+ "step": 9200
723
+ },
724
+ {
725
+ "epoch": 13.38,
726
+ "learning_rate": 5.829847328244274e-05,
727
+ "loss": 1.3323,
728
+ "step": 9300
729
+ },
730
+ {
731
+ "epoch": 13.52,
732
+ "learning_rate": 5.8069465648854954e-05,
733
+ "loss": 1.3002,
734
+ "step": 9400
735
+ },
736
+ {
737
+ "epoch": 13.67,
738
+ "learning_rate": 5.7840458015267174e-05,
739
+ "loss": 1.2878,
740
+ "step": 9500
741
+ },
742
+ {
743
+ "epoch": 13.67,
744
+ "eval_loss": 0.28934037685394287,
745
+ "eval_runtime": 56.1939,
746
+ "eval_samples_per_second": 8.115,
747
+ "eval_steps_per_second": 1.014,
748
+ "eval_wer": 0.31350045303533675,
749
+ "step": 9500
750
+ },
751
+ {
752
+ "epoch": 13.81,
753
+ "learning_rate": 5.761145038167939e-05,
754
+ "loss": 1.2958,
755
+ "step": 9600
756
+ },
757
+ {
758
+ "epoch": 13.96,
759
+ "learning_rate": 5.73824427480916e-05,
760
+ "loss": 1.293,
761
+ "step": 9700
762
+ },
763
+ {
764
+ "epoch": 14.1,
765
+ "learning_rate": 5.715343511450381e-05,
766
+ "loss": 1.286,
767
+ "step": 9800
768
+ },
769
+ {
770
+ "epoch": 14.24,
771
+ "learning_rate": 5.692442748091603e-05,
772
+ "loss": 1.2912,
773
+ "step": 9900
774
+ },
775
+ {
776
+ "epoch": 14.39,
777
+ "learning_rate": 5.669541984732824e-05,
778
+ "loss": 1.2602,
779
+ "step": 10000
780
+ },
781
+ {
782
+ "epoch": 14.39,
783
+ "eval_loss": 0.2723310589790344,
784
+ "eval_runtime": 57.7364,
785
+ "eval_samples_per_second": 7.898,
786
+ "eval_steps_per_second": 0.987,
787
+ "eval_wer": 0.30292962851102384,
788
+ "step": 10000
789
+ },
790
+ {
791
+ "epoch": 14.53,
792
+ "learning_rate": 5.6466412213740455e-05,
793
+ "loss": 1.2386,
794
+ "step": 10100
795
+ },
796
+ {
797
+ "epoch": 14.68,
798
+ "learning_rate": 5.623740458015266e-05,
799
+ "loss": 1.247,
800
+ "step": 10200
801
+ },
802
+ {
803
+ "epoch": 14.82,
804
+ "learning_rate": 5.600839694656488e-05,
805
+ "loss": 1.2611,
806
+ "step": 10300
807
+ },
808
+ {
809
+ "epoch": 14.96,
810
+ "learning_rate": 5.5779389312977095e-05,
811
+ "loss": 1.2238,
812
+ "step": 10400
813
+ },
814
+ {
815
+ "epoch": 15.11,
816
+ "learning_rate": 5.555038167938931e-05,
817
+ "loss": 1.2302,
818
+ "step": 10500
819
+ },
820
+ {
821
+ "epoch": 15.11,
822
+ "eval_loss": 0.260337769985199,
823
+ "eval_runtime": 56.8447,
824
+ "eval_samples_per_second": 8.022,
825
+ "eval_steps_per_second": 1.003,
826
+ "eval_wer": 0.29885231048021743,
827
+ "step": 10500
828
+ },
829
+ {
830
+ "epoch": 15.25,
831
+ "learning_rate": 5.532137404580152e-05,
832
+ "loss": 1.2119,
833
+ "step": 10600
834
+ },
835
+ {
836
+ "epoch": 15.4,
837
+ "learning_rate": 5.509236641221373e-05,
838
+ "loss": 1.205,
839
+ "step": 10700
840
+ },
841
+ {
842
+ "epoch": 15.54,
843
+ "learning_rate": 5.486335877862595e-05,
844
+ "loss": 1.1929,
845
+ "step": 10800
846
+ },
847
+ {
848
+ "epoch": 15.68,
849
+ "learning_rate": 5.463435114503816e-05,
850
+ "loss": 1.1917,
851
+ "step": 10900
852
+ },
853
+ {
854
+ "epoch": 15.83,
855
+ "learning_rate": 5.4405343511450376e-05,
856
+ "loss": 1.1865,
857
+ "step": 11000
858
+ },
859
+ {
860
+ "epoch": 15.83,
861
+ "eval_loss": 0.24401792883872986,
862
+ "eval_runtime": 56.3786,
863
+ "eval_samples_per_second": 8.088,
864
+ "eval_steps_per_second": 1.011,
865
+ "eval_wer": 0.279371790999698,
866
+ "step": 11000
867
+ },
868
+ {
869
+ "epoch": 15.97,
870
+ "learning_rate": 5.417633587786259e-05,
871
+ "loss": 1.1634,
872
+ "step": 11100
873
+ },
874
+ {
875
+ "epoch": 16.11,
876
+ "learning_rate": 5.394732824427481e-05,
877
+ "loss": 1.162,
878
+ "step": 11200
879
+ },
880
+ {
881
+ "epoch": 16.26,
882
+ "learning_rate": 5.3718320610687016e-05,
883
+ "loss": 1.1556,
884
+ "step": 11300
885
+ },
886
+ {
887
+ "epoch": 16.4,
888
+ "learning_rate": 5.348931297709923e-05,
889
+ "loss": 1.1432,
890
+ "step": 11400
891
+ },
892
+ {
893
+ "epoch": 16.55,
894
+ "learning_rate": 5.326030534351144e-05,
895
+ "loss": 1.1491,
896
+ "step": 11500
897
+ },
898
+ {
899
+ "epoch": 16.55,
900
+ "eval_loss": 0.24998041987419128,
901
+ "eval_runtime": 57.3291,
902
+ "eval_samples_per_second": 7.954,
903
+ "eval_steps_per_second": 0.994,
904
+ "eval_wer": 0.278767743884023,
905
+ "step": 11500
906
+ },
907
+ {
908
+ "epoch": 16.69,
909
+ "learning_rate": 5.303129770992366e-05,
910
+ "loss": 1.1351,
911
+ "step": 11600
912
+ },
913
+ {
914
+ "epoch": 16.83,
915
+ "learning_rate": 5.280458015267176e-05,
916
+ "loss": 1.1448,
917
+ "step": 11700
918
+ },
919
+ {
920
+ "epoch": 16.98,
921
+ "learning_rate": 5.2575572519083964e-05,
922
+ "loss": 1.123,
923
+ "step": 11800
924
+ },
925
+ {
926
+ "epoch": 17.12,
927
+ "learning_rate": 5.234656488549618e-05,
928
+ "loss": 1.1066,
929
+ "step": 11900
930
+ },
931
+ {
932
+ "epoch": 17.27,
933
+ "learning_rate": 5.211755725190839e-05,
934
+ "loss": 1.093,
935
+ "step": 12000
936
+ },
937
+ {
938
+ "epoch": 17.27,
939
+ "eval_loss": 0.22785192728042603,
940
+ "eval_runtime": 57.123,
941
+ "eval_samples_per_second": 7.983,
942
+ "eval_steps_per_second": 0.998,
943
+ "eval_wer": 0.2629115070975536,
944
+ "step": 12000
945
+ },
946
+ {
947
+ "epoch": 17.41,
948
+ "learning_rate": 5.188854961832061e-05,
949
+ "loss": 1.115,
950
+ "step": 12100
951
+ },
952
+ {
953
+ "epoch": 17.55,
954
+ "learning_rate": 5.1659541984732825e-05,
955
+ "loss": 1.0895,
956
+ "step": 12200
957
+ },
958
+ {
959
+ "epoch": 17.7,
960
+ "learning_rate": 5.143053435114503e-05,
961
+ "loss": 1.0705,
962
+ "step": 12300
963
+ },
964
+ {
965
+ "epoch": 17.84,
966
+ "learning_rate": 5.1201526717557245e-05,
967
+ "loss": 1.07,
968
+ "step": 12400
969
+ },
970
+ {
971
+ "epoch": 17.98,
972
+ "learning_rate": 5.0972519083969465e-05,
973
+ "loss": 1.0367,
974
+ "step": 12500
975
+ },
976
+ {
977
+ "epoch": 17.98,
978
+ "eval_loss": 0.2076062262058258,
979
+ "eval_runtime": 56.1861,
980
+ "eval_samples_per_second": 8.116,
981
+ "eval_steps_per_second": 1.014,
982
+ "eval_wer": 0.24433705829054667,
983
+ "step": 12500
984
+ },
985
+ {
986
+ "epoch": 18.13,
987
+ "learning_rate": 5.074351145038168e-05,
988
+ "loss": 1.0313,
989
+ "step": 12600
990
+ },
991
+ {
992
+ "epoch": 18.27,
993
+ "learning_rate": 5.051450381679389e-05,
994
+ "loss": 1.0236,
995
+ "step": 12700
996
+ },
997
+ {
998
+ "epoch": 18.42,
999
+ "learning_rate": 5.02854961832061e-05,
1000
+ "loss": 1.0035,
1001
+ "step": 12800
1002
+ },
1003
+ {
1004
+ "epoch": 18.56,
1005
+ "learning_rate": 5.005648854961831e-05,
1006
+ "loss": 1.0105,
1007
+ "step": 12900
1008
+ },
1009
+ {
1010
+ "epoch": 18.7,
1011
+ "learning_rate": 4.982748091603053e-05,
1012
+ "loss": 0.9954,
1013
+ "step": 13000
1014
+ },
1015
+ {
1016
+ "epoch": 18.7,
1017
+ "eval_loss": 0.18436060845851898,
1018
+ "eval_runtime": 58.6813,
1019
+ "eval_samples_per_second": 7.771,
1020
+ "eval_steps_per_second": 0.971,
1021
+ "eval_wer": 0.22591362126245848,
1022
+ "step": 13000
1023
+ },
1024
+ {
1025
+ "epoch": 18.85,
1026
+ "learning_rate": 4.9598473282442746e-05,
1027
+ "loss": 0.9883,
1028
+ "step": 13100
1029
+ },
1030
+ {
1031
+ "epoch": 18.99,
1032
+ "learning_rate": 4.936946564885495e-05,
1033
+ "loss": 0.9908,
1034
+ "step": 13200
1035
+ },
1036
+ {
1037
+ "epoch": 19.14,
1038
+ "learning_rate": 4.9140458015267166e-05,
1039
+ "loss": 0.9634,
1040
+ "step": 13300
1041
+ },
1042
+ {
1043
+ "epoch": 19.28,
1044
+ "learning_rate": 4.8911450381679386e-05,
1045
+ "loss": 0.9628,
1046
+ "step": 13400
1047
+ },
1048
+ {
1049
+ "epoch": 19.42,
1050
+ "learning_rate": 4.86824427480916e-05,
1051
+ "loss": 0.99,
1052
+ "step": 13500
1053
+ },
1054
+ {
1055
+ "epoch": 19.42,
1056
+ "eval_loss": 0.17937178909778595,
1057
+ "eval_runtime": 57.9354,
1058
+ "eval_samples_per_second": 7.871,
1059
+ "eval_steps_per_second": 0.984,
1060
+ "eval_wer": 0.21790999697976443,
1061
+ "step": 13500
1062
+ },
1063
+ {
1064
+ "epoch": 19.57,
1065
+ "learning_rate": 4.845343511450381e-05,
1066
+ "loss": 0.9627,
1067
+ "step": 13600
1068
+ },
1069
+ {
1070
+ "epoch": 19.71,
1071
+ "learning_rate": 4.822442748091602e-05,
1072
+ "loss": 0.969,
1073
+ "step": 13700
1074
+ },
1075
+ {
1076
+ "epoch": 19.86,
1077
+ "learning_rate": 4.799541984732824e-05,
1078
+ "loss": 0.9507,
1079
+ "step": 13800
1080
+ },
1081
+ {
1082
+ "epoch": 20.0,
1083
+ "learning_rate": 4.776641221374045e-05,
1084
+ "loss": 0.9696,
1085
+ "step": 13900
1086
+ },
1087
+ {
1088
+ "epoch": 20.14,
1089
+ "learning_rate": 4.753740458015267e-05,
1090
+ "loss": 0.9385,
1091
+ "step": 14000
1092
+ },
1093
+ {
1094
+ "epoch": 20.14,
1095
+ "eval_loss": 0.1765013188123703,
1096
+ "eval_runtime": 56.2969,
1097
+ "eval_samples_per_second": 8.1,
1098
+ "eval_steps_per_second": 1.012,
1099
+ "eval_wer": 0.2121715493808517,
1100
+ "step": 14000
1101
+ },
1102
+ {
1103
+ "epoch": 20.29,
1104
+ "learning_rate": 4.730839694656488e-05,
1105
+ "loss": 0.9264,
1106
+ "step": 14100
1107
+ },
1108
+ {
1109
+ "epoch": 20.43,
1110
+ "learning_rate": 4.70793893129771e-05,
1111
+ "loss": 0.9399,
1112
+ "step": 14200
1113
+ },
1114
+ {
1115
+ "epoch": 20.57,
1116
+ "learning_rate": 4.685038167938931e-05,
1117
+ "loss": 0.923,
1118
+ "step": 14300
1119
+ },
1120
+ {
1121
+ "epoch": 20.72,
1122
+ "learning_rate": 4.662137404580152e-05,
1123
+ "loss": 0.9128,
1124
+ "step": 14400
1125
+ },
1126
+ {
1127
+ "epoch": 20.86,
1128
+ "learning_rate": 4.6392366412213734e-05,
1129
+ "loss": 0.8952,
1130
+ "step": 14500
1131
+ },
1132
+ {
1133
+ "epoch": 20.86,
1134
+ "eval_loss": 0.17056496441364288,
1135
+ "eval_runtime": 56.9013,
1136
+ "eval_samples_per_second": 8.014,
1137
+ "eval_steps_per_second": 1.002,
1138
+ "eval_wer": 0.19737239504681364,
1139
+ "step": 14500
1140
+ },
1141
+ {
1142
+ "epoch": 21.01,
1143
+ "learning_rate": 4.6163358778625954e-05,
1144
+ "loss": 0.9509,
1145
+ "step": 14600
1146
+ },
1147
+ {
1148
+ "epoch": 21.15,
1149
+ "learning_rate": 4.593435114503817e-05,
1150
+ "loss": 0.8967,
1151
+ "step": 14700
1152
+ },
1153
+ {
1154
+ "epoch": 21.29,
1155
+ "learning_rate": 4.5705343511450374e-05,
1156
+ "loss": 0.8928,
1157
+ "step": 14800
1158
+ },
1159
+ {
1160
+ "epoch": 21.44,
1161
+ "learning_rate": 4.547633587786259e-05,
1162
+ "loss": 0.8731,
1163
+ "step": 14900
1164
+ },
1165
+ {
1166
+ "epoch": 21.58,
1167
+ "learning_rate": 4.524732824427481e-05,
1168
+ "loss": 0.8841,
1169
+ "step": 15000
1170
+ },
1171
+ {
1172
+ "epoch": 21.58,
1173
+ "eval_loss": 0.17911894619464874,
1174
+ "eval_runtime": 56.4012,
1175
+ "eval_samples_per_second": 8.085,
1176
+ "eval_steps_per_second": 1.011,
1177
+ "eval_wer": 0.1969193597100574,
1178
+ "step": 15000
1179
+ },
1180
+ {
1181
+ "epoch": 21.73,
1182
+ "learning_rate": 4.501832061068702e-05,
1183
+ "loss": 0.8816,
1184
+ "step": 15100
1185
+ },
1186
+ {
1187
+ "epoch": 21.87,
1188
+ "learning_rate": 4.4789312977099235e-05,
1189
+ "loss": 0.8801,
1190
+ "step": 15200
1191
+ },
1192
+ {
1193
+ "epoch": 22.01,
1194
+ "learning_rate": 4.456030534351144e-05,
1195
+ "loss": 0.9044,
1196
+ "step": 15300
1197
+ },
1198
+ {
1199
+ "epoch": 22.16,
1200
+ "learning_rate": 4.433129770992366e-05,
1201
+ "loss": 0.8849,
1202
+ "step": 15400
1203
+ },
1204
+ {
1205
+ "epoch": 22.3,
1206
+ "learning_rate": 4.4102290076335875e-05,
1207
+ "loss": 0.847,
1208
+ "step": 15500
1209
+ },
1210
+ {
1211
+ "epoch": 22.3,
1212
+ "eval_loss": 0.17799803614616394,
1213
+ "eval_runtime": 57.0716,
1214
+ "eval_samples_per_second": 7.99,
1215
+ "eval_steps_per_second": 0.999,
1216
+ "eval_wer": 0.2059800664451827,
1217
+ "step": 15500
1218
+ },
1219
+ {
1220
+ "epoch": 22.45,
1221
+ "learning_rate": 4.387328244274809e-05,
1222
+ "loss": 0.8805,
1223
+ "step": 15600
1224
+ },
1225
+ {
1226
+ "epoch": 22.59,
1227
+ "learning_rate": 4.3644274809160295e-05,
1228
+ "loss": 0.8516,
1229
+ "step": 15700
1230
+ },
1231
+ {
1232
+ "epoch": 22.73,
1233
+ "learning_rate": 4.341984732824427e-05,
1234
+ "loss": 0.8137,
1235
+ "step": 15800
1236
+ },
1237
+ {
1238
+ "epoch": 22.88,
1239
+ "learning_rate": 4.3190839694656484e-05,
1240
+ "loss": 0.8335,
1241
+ "step": 15900
1242
+ },
1243
+ {
1244
+ "epoch": 23.02,
1245
+ "learning_rate": 4.29618320610687e-05,
1246
+ "loss": 0.8669,
1247
+ "step": 16000
1248
+ },
1249
+ {
1250
+ "epoch": 23.02,
1251
+ "eval_loss": 0.16084039211273193,
1252
+ "eval_runtime": 58.1201,
1253
+ "eval_samples_per_second": 7.846,
1254
+ "eval_steps_per_second": 0.981,
1255
+ "eval_wer": 0.18619752340682574,
1256
+ "step": 16000
1257
+ },
1258
+ {
1259
+ "epoch": 23.17,
1260
+ "learning_rate": 4.273282442748092e-05,
1261
+ "loss": 0.8242,
1262
+ "step": 16100
1263
+ },
1264
+ {
1265
+ "epoch": 23.31,
1266
+ "learning_rate": 4.250381679389313e-05,
1267
+ "loss": 0.8384,
1268
+ "step": 16200
1269
+ },
1270
+ {
1271
+ "epoch": 23.45,
1272
+ "learning_rate": 4.227480916030534e-05,
1273
+ "loss": 0.8376,
1274
+ "step": 16300
1275
+ },
1276
+ {
1277
+ "epoch": 23.6,
1278
+ "learning_rate": 4.204809160305343e-05,
1279
+ "loss": 0.8277,
1280
+ "step": 16400
1281
+ },
1282
+ {
1283
+ "epoch": 23.74,
1284
+ "learning_rate": 4.1819083969465645e-05,
1285
+ "loss": 0.8066,
1286
+ "step": 16500
1287
+ },
1288
+ {
1289
+ "epoch": 23.74,
1290
+ "eval_loss": 0.14472883939743042,
1291
+ "eval_runtime": 58.2049,
1292
+ "eval_samples_per_second": 7.834,
1293
+ "eval_steps_per_second": 0.979,
1294
+ "eval_wer": 0.16263968589549985,
1295
+ "step": 16500
1296
+ },
1297
+ {
1298
+ "epoch": 23.88,
1299
+ "learning_rate": 4.159007633587786e-05,
1300
+ "loss": 0.8056,
1301
+ "step": 16600
1302
+ },
1303
+ {
1304
+ "epoch": 24.03,
1305
+ "learning_rate": 4.136106870229008e-05,
1306
+ "loss": 0.8128,
1307
+ "step": 16700
1308
+ },
1309
+ {
1310
+ "epoch": 24.17,
1311
+ "learning_rate": 4.1132061068702285e-05,
1312
+ "loss": 0.8095,
1313
+ "step": 16800
1314
+ },
1315
+ {
1316
+ "epoch": 24.32,
1317
+ "learning_rate": 4.09030534351145e-05,
1318
+ "loss": 0.7816,
1319
+ "step": 16900
1320
+ },
1321
+ {
1322
+ "epoch": 24.46,
1323
+ "learning_rate": 4.067404580152671e-05,
1324
+ "loss": 0.7908,
1325
+ "step": 17000
1326
+ },
1327
+ {
1328
+ "epoch": 24.46,
1329
+ "eval_loss": 0.14570841193199158,
1330
+ "eval_runtime": 56.8345,
1331
+ "eval_samples_per_second": 8.023,
1332
+ "eval_steps_per_second": 1.003,
1333
+ "eval_wer": 0.1655089096949562,
1334
+ "step": 17000
1335
+ },
1336
+ {
1337
+ "epoch": 24.6,
1338
+ "learning_rate": 4.044503816793893e-05,
1339
+ "loss": 0.7846,
1340
+ "step": 17100
1341
+ },
1342
+ {
1343
+ "epoch": 24.75,
1344
+ "learning_rate": 4.0216030534351146e-05,
1345
+ "loss": 0.7735,
1346
+ "step": 17200
1347
+ },
1348
+ {
1349
+ "epoch": 24.89,
1350
+ "learning_rate": 3.998702290076335e-05,
1351
+ "loss": 0.772,
1352
+ "step": 17300
1353
+ },
1354
+ {
1355
+ "epoch": 25.04,
1356
+ "learning_rate": 3.9758015267175566e-05,
1357
+ "loss": 0.7676,
1358
+ "step": 17400
1359
+ },
1360
+ {
1361
+ "epoch": 25.18,
1362
+ "learning_rate": 3.9529007633587786e-05,
1363
+ "loss": 0.7459,
1364
+ "step": 17500
1365
+ },
1366
+ {
1367
+ "epoch": 25.18,
1368
+ "eval_loss": 0.13501976430416107,
1369
+ "eval_runtime": 56.9549,
1370
+ "eval_samples_per_second": 8.006,
1371
+ "eval_steps_per_second": 1.001,
1372
+ "eval_wer": 0.14451827242524917,
1373
+ "step": 17500
1374
+ },
1375
+ {
1376
+ "epoch": 25.32,
1377
+ "learning_rate": 3.93e-05,
1378
+ "loss": 0.7394,
1379
+ "step": 17600
1380
+ },
1381
+ {
1382
+ "epoch": 25.47,
1383
+ "learning_rate": 3.9070992366412206e-05,
1384
+ "loss": 0.727,
1385
+ "step": 17700
1386
+ },
1387
+ {
1388
+ "epoch": 25.61,
1389
+ "learning_rate": 3.884198473282442e-05,
1390
+ "loss": 0.7255,
1391
+ "step": 17800
1392
+ },
1393
+ {
1394
+ "epoch": 25.75,
1395
+ "learning_rate": 3.861297709923664e-05,
1396
+ "loss": 0.7122,
1397
+ "step": 17900
1398
+ },
1399
+ {
1400
+ "epoch": 25.9,
1401
+ "learning_rate": 3.8383969465648854e-05,
1402
+ "loss": 0.7218,
1403
+ "step": 18000
1404
+ },
1405
+ {
1406
+ "epoch": 25.9,
1407
+ "eval_loss": 0.12762019038200378,
1408
+ "eval_runtime": 56.702,
1409
+ "eval_samples_per_second": 8.042,
1410
+ "eval_steps_per_second": 1.005,
1411
+ "eval_wer": 0.1421020839625491,
1412
+ "step": 18000
1413
+ },
1414
+ {
1415
+ "epoch": 26.04,
1416
+ "learning_rate": 3.815496183206107e-05,
1417
+ "loss": 0.7219,
1418
+ "step": 18100
1419
+ },
1420
+ {
1421
+ "epoch": 26.19,
1422
+ "learning_rate": 3.7925954198473274e-05,
1423
+ "loss": 0.6954,
1424
+ "step": 18200
1425
+ },
1426
+ {
1427
+ "epoch": 26.33,
1428
+ "learning_rate": 3.7696946564885494e-05,
1429
+ "loss": 0.6874,
1430
+ "step": 18300
1431
+ },
1432
+ {
1433
+ "epoch": 26.47,
1434
+ "learning_rate": 3.746793893129771e-05,
1435
+ "loss": 0.6974,
1436
+ "step": 18400
1437
+ },
1438
+ {
1439
+ "epoch": 26.62,
1440
+ "learning_rate": 3.7241221374045795e-05,
1441
+ "loss": 0.703,
1442
+ "step": 18500
1443
+ },
1444
+ {
1445
+ "epoch": 26.62,
1446
+ "eval_loss": 0.11774259060621262,
1447
+ "eval_runtime": 58.7528,
1448
+ "eval_samples_per_second": 7.761,
1449
+ "eval_steps_per_second": 0.97,
1450
+ "eval_wer": 0.13017215342796737,
1451
+ "step": 18500
1452
+ },
1453
+ {
1454
+ "epoch": 26.76,
1455
+ "learning_rate": 3.7012213740458015e-05,
1456
+ "loss": 0.6976,
1457
+ "step": 18600
1458
+ },
1459
+ {
1460
+ "epoch": 26.91,
1461
+ "learning_rate": 3.678320610687022e-05,
1462
+ "loss": 0.7008,
1463
+ "step": 18700
1464
+ },
1465
+ {
1466
+ "epoch": 27.05,
1467
+ "learning_rate": 3.655419847328244e-05,
1468
+ "loss": 0.6815,
1469
+ "step": 18800
1470
+ },
1471
+ {
1472
+ "epoch": 27.19,
1473
+ "learning_rate": 3.6325190839694655e-05,
1474
+ "loss": 0.6783,
1475
+ "step": 18900
1476
+ },
1477
+ {
1478
+ "epoch": 27.34,
1479
+ "learning_rate": 3.609618320610687e-05,
1480
+ "loss": 0.685,
1481
+ "step": 19000
1482
+ },
1483
+ {
1484
+ "epoch": 27.34,
1485
+ "eval_loss": 0.11473936587572098,
1486
+ "eval_runtime": 56.5545,
1487
+ "eval_samples_per_second": 8.063,
1488
+ "eval_steps_per_second": 1.008,
1489
+ "eval_wer": 0.13047417698580488,
1490
+ "step": 19000
1491
+ },
1492
+ {
1493
+ "epoch": 27.48,
1494
+ "learning_rate": 3.586717557251908e-05,
1495
+ "loss": 0.6828,
1496
+ "step": 19100
1497
+ },
1498
+ {
1499
+ "epoch": 27.63,
1500
+ "learning_rate": 3.5638167938931296e-05,
1501
+ "loss": 0.6899,
1502
+ "step": 19200
1503
+ },
1504
+ {
1505
+ "epoch": 27.77,
1506
+ "learning_rate": 3.540916030534351e-05,
1507
+ "loss": 0.6889,
1508
+ "step": 19300
1509
+ },
1510
+ {
1511
+ "epoch": 27.91,
1512
+ "learning_rate": 3.518015267175572e-05,
1513
+ "loss": 0.689,
1514
+ "step": 19400
1515
+ },
1516
+ {
1517
+ "epoch": 28.06,
1518
+ "learning_rate": 3.4951145038167936e-05,
1519
+ "loss": 0.6811,
1520
+ "step": 19500
1521
+ },
1522
+ {
1523
+ "epoch": 28.06,
1524
+ "eval_loss": 0.11280036717653275,
1525
+ "eval_runtime": 57.3287,
1526
+ "eval_samples_per_second": 7.954,
1527
+ "eval_steps_per_second": 0.994,
1528
+ "eval_wer": 0.12443370582905466,
1529
+ "step": 19500
1530
+ },
1531
+ {
1532
+ "epoch": 28.2,
1533
+ "learning_rate": 3.472213740458015e-05,
1534
+ "loss": 0.6599,
1535
+ "step": 19600
1536
+ },
1537
+ {
1538
+ "epoch": 28.34,
1539
+ "learning_rate": 3.449312977099236e-05,
1540
+ "loss": 0.655,
1541
+ "step": 19700
1542
+ },
1543
+ {
1544
+ "epoch": 28.49,
1545
+ "learning_rate": 3.4264122137404576e-05,
1546
+ "loss": 0.6577,
1547
+ "step": 19800
1548
+ },
1549
+ {
1550
+ "epoch": 28.63,
1551
+ "learning_rate": 3.403511450381679e-05,
1552
+ "loss": 0.6562,
1553
+ "step": 19900
1554
+ },
1555
+ {
1556
+ "epoch": 28.78,
1557
+ "learning_rate": 3.3806106870229e-05,
1558
+ "loss": 0.6444,
1559
+ "step": 20000
1560
+ },
1561
+ {
1562
+ "epoch": 28.78,
1563
+ "eval_loss": 0.11200590431690216,
1564
+ "eval_runtime": 56.8504,
1565
+ "eval_samples_per_second": 8.021,
1566
+ "eval_steps_per_second": 1.003,
1567
+ "eval_wer": 0.1212624584717608,
1568
+ "step": 20000
1569
+ },
1570
+ {
1571
+ "epoch": 28.92,
1572
+ "learning_rate": 3.357709923664122e-05,
1573
+ "loss": 0.6543,
1574
+ "step": 20100
1575
+ },
1576
+ {
1577
+ "epoch": 29.06,
1578
+ "learning_rate": 3.334809160305344e-05,
1579
+ "loss": 0.6337,
1580
+ "step": 20200
1581
+ },
1582
+ {
1583
+ "epoch": 29.21,
1584
+ "learning_rate": 3.3119083969465644e-05,
1585
+ "loss": 0.613,
1586
+ "step": 20300
1587
+ },
1588
+ {
1589
+ "epoch": 29.35,
1590
+ "learning_rate": 3.2890076335877864e-05,
1591
+ "loss": 0.6197,
1592
+ "step": 20400
1593
+ },
1594
+ {
1595
+ "epoch": 29.5,
1596
+ "learning_rate": 3.266106870229007e-05,
1597
+ "loss": 0.6323,
1598
+ "step": 20500
1599
+ },
1600
+ {
1601
+ "epoch": 29.5,
1602
+ "eval_loss": 0.11374162137508392,
1603
+ "eval_runtime": 56.3556,
1604
+ "eval_samples_per_second": 8.091,
1605
+ "eval_steps_per_second": 1.011,
1606
+ "eval_wer": 0.11658109332527937,
1607
+ "step": 20500
1608
+ },
1609
+ {
1610
+ "epoch": 29.64,
1611
+ "learning_rate": 3.243206106870229e-05,
1612
+ "loss": 0.6182,
1613
+ "step": 20600
1614
+ },
1615
+ {
1616
+ "epoch": 29.78,
1617
+ "learning_rate": 3.2203053435114504e-05,
1618
+ "loss": 0.6298,
1619
+ "step": 20700
1620
+ },
1621
+ {
1622
+ "epoch": 29.93,
1623
+ "learning_rate": 3.197404580152672e-05,
1624
+ "loss": 0.6143,
1625
+ "step": 20800
1626
+ },
1627
+ {
1628
+ "epoch": 30.07,
1629
+ "learning_rate": 3.174503816793893e-05,
1630
+ "loss": 0.5943,
1631
+ "step": 20900
1632
+ },
1633
+ {
1634
+ "epoch": 30.22,
1635
+ "learning_rate": 3.1516030534351145e-05,
1636
+ "loss": 0.5998,
1637
+ "step": 21000
1638
+ },
1639
+ {
1640
+ "epoch": 30.22,
1641
+ "eval_loss": 0.10512539744377136,
1642
+ "eval_runtime": 56.712,
1643
+ "eval_samples_per_second": 8.041,
1644
+ "eval_steps_per_second": 1.005,
1645
+ "eval_wer": 0.1106916339474479,
1646
+ "step": 21000
1647
+ },
1648
+ {
1649
+ "epoch": 30.36,
1650
+ "learning_rate": 3.128702290076336e-05,
1651
+ "loss": 0.6093,
1652
+ "step": 21100
1653
+ },
1654
+ {
1655
+ "epoch": 30.5,
1656
+ "learning_rate": 3.1058015267175565e-05,
1657
+ "loss": 0.5948,
1658
+ "step": 21200
1659
+ },
1660
+ {
1661
+ "epoch": 30.65,
1662
+ "learning_rate": 3.0829007633587785e-05,
1663
+ "loss": 0.5847,
1664
+ "step": 21300
1665
+ },
1666
+ {
1667
+ "epoch": 30.79,
1668
+ "learning_rate": 3.06e-05,
1669
+ "loss": 0.5768,
1670
+ "step": 21400
1671
+ },
1672
+ {
1673
+ "epoch": 30.93,
1674
+ "learning_rate": 3.0370992366412212e-05,
1675
+ "loss": 0.5706,
1676
+ "step": 21500
1677
+ },
1678
+ {
1679
+ "epoch": 30.93,
1680
+ "eval_loss": 0.1035308688879013,
1681
+ "eval_runtime": 56.4523,
1682
+ "eval_samples_per_second": 8.078,
1683
+ "eval_steps_per_second": 1.01,
1684
+ "eval_wer": 0.10374509211718513,
1685
+ "step": 21500
1686
+ },
1687
+ {
1688
+ "epoch": 31.08,
1689
+ "learning_rate": 3.0141984732824422e-05,
1690
+ "loss": 0.5819,
1691
+ "step": 21600
1692
+ },
1693
+ {
1694
+ "epoch": 31.22,
1695
+ "learning_rate": 2.991297709923664e-05,
1696
+ "loss": 0.5529,
1697
+ "step": 21700
1698
+ },
1699
+ {
1700
+ "epoch": 31.37,
1701
+ "learning_rate": 2.9683969465648852e-05,
1702
+ "loss": 0.5691,
1703
+ "step": 21800
1704
+ },
1705
+ {
1706
+ "epoch": 31.51,
1707
+ "learning_rate": 2.9454961832061066e-05,
1708
+ "loss": 0.5622,
1709
+ "step": 21900
1710
+ },
1711
+ {
1712
+ "epoch": 31.65,
1713
+ "learning_rate": 2.922595419847328e-05,
1714
+ "loss": 0.5555,
1715
+ "step": 22000
1716
+ },
1717
+ {
1718
+ "epoch": 31.65,
1719
+ "eval_loss": 0.10311654210090637,
1720
+ "eval_runtime": 56.2781,
1721
+ "eval_samples_per_second": 8.103,
1722
+ "eval_steps_per_second": 1.013,
1723
+ "eval_wer": 0.09272123225611598,
1724
+ "step": 22000
1725
+ },
1726
+ {
1727
+ "epoch": 31.8,
1728
+ "learning_rate": 2.8996946564885493e-05,
1729
+ "loss": 0.5697,
1730
+ "step": 22100
1731
+ },
1732
+ {
1733
+ "epoch": 31.94,
1734
+ "learning_rate": 2.8767938931297706e-05,
1735
+ "loss": 0.5567,
1736
+ "step": 22200
1737
+ },
1738
+ {
1739
+ "epoch": 32.09,
1740
+ "learning_rate": 2.8538931297709923e-05,
1741
+ "loss": 0.5562,
1742
+ "step": 22300
1743
+ },
1744
+ {
1745
+ "epoch": 32.23,
1746
+ "learning_rate": 2.8309923664122133e-05,
1747
+ "loss": 0.5482,
1748
+ "step": 22400
1749
+ },
1750
+ {
1751
+ "epoch": 32.37,
1752
+ "learning_rate": 2.808091603053435e-05,
1753
+ "loss": 0.5389,
1754
+ "step": 22500
1755
+ },
1756
+ {
1757
+ "epoch": 32.37,
1758
+ "eval_loss": 0.09973499178886414,
1759
+ "eval_runtime": 56.482,
1760
+ "eval_samples_per_second": 8.073,
1761
+ "eval_steps_per_second": 1.009,
1762
+ "eval_wer": 0.09000302023557838,
1763
+ "step": 22500
1764
+ },
1765
+ {
1766
+ "epoch": 32.52,
1767
+ "learning_rate": 2.785419847328244e-05,
1768
+ "loss": 0.5398,
1769
+ "step": 22600
1770
+ },
1771
+ {
1772
+ "epoch": 32.66,
1773
+ "learning_rate": 2.7625190839694654e-05,
1774
+ "loss": 0.5284,
1775
+ "step": 22700
1776
+ },
1777
+ {
1778
+ "epoch": 32.8,
1779
+ "learning_rate": 2.739618320610687e-05,
1780
+ "loss": 0.5292,
1781
+ "step": 22800
1782
+ },
1783
+ {
1784
+ "epoch": 32.95,
1785
+ "learning_rate": 2.716717557251908e-05,
1786
+ "loss": 0.5186,
1787
+ "step": 22900
1788
+ },
1789
+ {
1790
+ "epoch": 33.09,
1791
+ "learning_rate": 2.6938167938931298e-05,
1792
+ "loss": 0.5201,
1793
+ "step": 23000
1794
+ },
1795
+ {
1796
+ "epoch": 33.09,
1797
+ "eval_loss": 0.09196200966835022,
1798
+ "eval_runtime": 56.8421,
1799
+ "eval_samples_per_second": 8.022,
1800
+ "eval_steps_per_second": 1.003,
1801
+ "eval_wer": 0.09121111446692842,
1802
+ "step": 23000
1803
+ },
1804
+ {
1805
+ "epoch": 33.24,
1806
+ "learning_rate": 2.6709160305343508e-05,
1807
+ "loss": 0.5205,
1808
+ "step": 23100
1809
+ },
1810
+ {
1811
+ "epoch": 33.38,
1812
+ "learning_rate": 2.648015267175572e-05,
1813
+ "loss": 0.5247,
1814
+ "step": 23200
1815
+ },
1816
+ {
1817
+ "epoch": 33.52,
1818
+ "learning_rate": 2.6251145038167938e-05,
1819
+ "loss": 0.5156,
1820
+ "step": 23300
1821
+ },
1822
+ {
1823
+ "epoch": 33.67,
1824
+ "learning_rate": 2.6022137404580148e-05,
1825
+ "loss": 0.5125,
1826
+ "step": 23400
1827
+ },
1828
+ {
1829
+ "epoch": 33.81,
1830
+ "learning_rate": 2.5793129770992365e-05,
1831
+ "loss": 0.5146,
1832
+ "step": 23500
1833
+ },
1834
+ {
1835
+ "epoch": 33.81,
1836
+ "eval_loss": 0.09293542802333832,
1837
+ "eval_runtime": 57.2076,
1838
+ "eval_samples_per_second": 7.971,
1839
+ "eval_steps_per_second": 0.996,
1840
+ "eval_wer": 0.0946843853820598,
1841
+ "step": 23500
1842
+ },
1843
+ {
1844
+ "epoch": 33.96,
1845
+ "learning_rate": 2.5564122137404575e-05,
1846
+ "loss": 0.5085,
1847
+ "step": 23600
1848
+ },
1849
+ {
1850
+ "epoch": 34.1,
1851
+ "learning_rate": 2.5335114503816792e-05,
1852
+ "loss": 0.5173,
1853
+ "step": 23700
1854
+ },
1855
+ {
1856
+ "epoch": 34.24,
1857
+ "learning_rate": 2.5106106870229002e-05,
1858
+ "loss": 0.5101,
1859
+ "step": 23800
1860
+ },
1861
+ {
1862
+ "epoch": 34.39,
1863
+ "learning_rate": 2.487709923664122e-05,
1864
+ "loss": 0.5056,
1865
+ "step": 23900
1866
+ },
1867
+ {
1868
+ "epoch": 34.53,
1869
+ "learning_rate": 2.4648091603053432e-05,
1870
+ "loss": 0.515,
1871
+ "step": 24000
1872
+ },
1873
+ {
1874
+ "epoch": 34.53,
1875
+ "eval_loss": 0.10004792362451553,
1876
+ "eval_runtime": 56.2386,
1877
+ "eval_samples_per_second": 8.108,
1878
+ "eval_steps_per_second": 1.014,
1879
+ "eval_wer": 0.09528843249773482,
1880
+ "step": 24000
1881
+ },
1882
+ {
1883
+ "epoch": 34.68,
1884
+ "learning_rate": 2.4419083969465646e-05,
1885
+ "loss": 0.5211,
1886
+ "step": 24100
1887
+ },
1888
+ {
1889
+ "epoch": 34.82,
1890
+ "learning_rate": 2.419007633587786e-05,
1891
+ "loss": 0.5065,
1892
+ "step": 24200
1893
+ },
1894
+ {
1895
+ "epoch": 34.96,
1896
+ "learning_rate": 2.3961068702290076e-05,
1897
+ "loss": 0.4992,
1898
+ "step": 24300
1899
+ },
1900
+ {
1901
+ "epoch": 35.11,
1902
+ "learning_rate": 2.3732061068702286e-05,
1903
+ "loss": 0.495,
1904
+ "step": 24400
1905
+ },
1906
+ {
1907
+ "epoch": 35.25,
1908
+ "learning_rate": 2.3503053435114503e-05,
1909
+ "loss": 0.4743,
1910
+ "step": 24500
1911
+ },
1912
+ {
1913
+ "epoch": 35.25,
1914
+ "eval_loss": 0.09216006845235825,
1915
+ "eval_runtime": 56.5494,
1916
+ "eval_samples_per_second": 8.064,
1917
+ "eval_steps_per_second": 1.008,
1918
+ "eval_wer": 0.0892479613409846,
1919
+ "step": 24500
1920
+ },
1921
+ {
1922
+ "epoch": 35.4,
1923
+ "learning_rate": 2.3274045801526713e-05,
1924
+ "loss": 0.4769,
1925
+ "step": 24600
1926
+ },
1927
+ {
1928
+ "epoch": 35.54,
1929
+ "learning_rate": 2.304503816793893e-05,
1930
+ "loss": 0.4774,
1931
+ "step": 24700
1932
+ },
1933
+ {
1934
+ "epoch": 35.68,
1935
+ "learning_rate": 2.28206106870229e-05,
1936
+ "loss": 0.487,
1937
+ "step": 24800
1938
+ },
1939
+ {
1940
+ "epoch": 35.83,
1941
+ "learning_rate": 2.259160305343511e-05,
1942
+ "loss": 0.4797,
1943
+ "step": 24900
1944
+ },
1945
+ {
1946
+ "epoch": 35.97,
1947
+ "learning_rate": 2.2362595419847328e-05,
1948
+ "loss": 0.4707,
1949
+ "step": 25000
1950
+ },
1951
+ {
1952
+ "epoch": 35.97,
1953
+ "eval_loss": 0.08524981886148453,
1954
+ "eval_runtime": 56.8651,
1955
+ "eval_samples_per_second": 8.019,
1956
+ "eval_steps_per_second": 1.002,
1957
+ "eval_wer": 0.08079130172153429,
1958
+ "step": 25000
1959
+ },
1960
+ {
1961
+ "epoch": 36.11,
1962
+ "learning_rate": 2.2133587786259538e-05,
1963
+ "loss": 0.4668,
1964
+ "step": 25100
1965
+ },
1966
+ {
1967
+ "epoch": 36.26,
1968
+ "learning_rate": 2.1904580152671755e-05,
1969
+ "loss": 0.476,
1970
+ "step": 25200
1971
+ },
1972
+ {
1973
+ "epoch": 36.4,
1974
+ "learning_rate": 2.167557251908397e-05,
1975
+ "loss": 0.4609,
1976
+ "step": 25300
1977
+ },
1978
+ {
1979
+ "epoch": 36.55,
1980
+ "learning_rate": 2.1446564885496182e-05,
1981
+ "loss": 0.4605,
1982
+ "step": 25400
1983
+ },
1984
+ {
1985
+ "epoch": 36.69,
1986
+ "learning_rate": 2.1217557251908395e-05,
1987
+ "loss": 0.4456,
1988
+ "step": 25500
1989
+ },
1990
+ {
1991
+ "epoch": 36.69,
1992
+ "eval_loss": 0.08548382669687271,
1993
+ "eval_runtime": 56.4053,
1994
+ "eval_samples_per_second": 8.084,
1995
+ "eval_steps_per_second": 1.011,
1996
+ "eval_wer": 0.07792207792207792,
1997
+ "step": 25500
1998
+ },
1999
+ {
2000
+ "epoch": 36.83,
2001
+ "learning_rate": 2.098854961832061e-05,
2002
+ "loss": 0.4515,
2003
+ "step": 25600
2004
+ },
2005
+ {
2006
+ "epoch": 36.98,
2007
+ "learning_rate": 2.0759541984732822e-05,
2008
+ "loss": 0.4426,
2009
+ "step": 25700
2010
+ },
2011
+ {
2012
+ "epoch": 37.12,
2013
+ "learning_rate": 2.053053435114504e-05,
2014
+ "loss": 0.4491,
2015
+ "step": 25800
2016
+ },
2017
+ {
2018
+ "epoch": 37.27,
2019
+ "learning_rate": 2.030152671755725e-05,
2020
+ "loss": 0.4464,
2021
+ "step": 25900
2022
+ },
2023
+ {
2024
+ "epoch": 37.41,
2025
+ "learning_rate": 2.0072519083969466e-05,
2026
+ "loss": 0.443,
2027
+ "step": 26000
2028
+ },
2029
+ {
2030
+ "epoch": 37.41,
2031
+ "eval_loss": 0.0842847004532814,
2032
+ "eval_runtime": 56.5582,
2033
+ "eval_samples_per_second": 8.062,
2034
+ "eval_steps_per_second": 1.008,
2035
+ "eval_wer": 0.07384475989127152,
2036
+ "step": 26000
2037
+ },
2038
+ {
2039
+ "epoch": 37.55,
2040
+ "learning_rate": 1.9843511450381676e-05,
2041
+ "loss": 0.4381,
2042
+ "step": 26100
2043
+ },
2044
+ {
2045
+ "epoch": 37.7,
2046
+ "learning_rate": 1.9614503816793893e-05,
2047
+ "loss": 0.4311,
2048
+ "step": 26200
2049
+ },
2050
+ {
2051
+ "epoch": 37.84,
2052
+ "learning_rate": 1.9385496183206106e-05,
2053
+ "loss": 0.4413,
2054
+ "step": 26300
2055
+ },
2056
+ {
2057
+ "epoch": 37.98,
2058
+ "learning_rate": 1.915648854961832e-05,
2059
+ "loss": 0.4186,
2060
+ "step": 26400
2061
+ },
2062
+ {
2063
+ "epoch": 38.13,
2064
+ "learning_rate": 1.8927480916030533e-05,
2065
+ "loss": 0.4388,
2066
+ "step": 26500
2067
+ },
2068
+ {
2069
+ "epoch": 38.13,
2070
+ "eval_loss": 0.081607885658741,
2071
+ "eval_runtime": 56.5857,
2072
+ "eval_samples_per_second": 8.059,
2073
+ "eval_steps_per_second": 1.007,
2074
+ "eval_wer": 0.06991845363938387,
2075
+ "step": 26500
2076
+ },
2077
+ {
2078
+ "epoch": 38.27,
2079
+ "learning_rate": 1.8698473282442747e-05,
2080
+ "loss": 0.4181,
2081
+ "step": 26600
2082
+ },
2083
+ {
2084
+ "epoch": 38.42,
2085
+ "learning_rate": 1.846946564885496e-05,
2086
+ "loss": 0.4338,
2087
+ "step": 26700
2088
+ },
2089
+ {
2090
+ "epoch": 38.56,
2091
+ "learning_rate": 1.8240458015267174e-05,
2092
+ "loss": 0.4186,
2093
+ "step": 26800
2094
+ },
2095
+ {
2096
+ "epoch": 38.7,
2097
+ "learning_rate": 1.8011450381679387e-05,
2098
+ "loss": 0.4169,
2099
+ "step": 26900
2100
+ },
2101
+ {
2102
+ "epoch": 38.85,
2103
+ "learning_rate": 1.77824427480916e-05,
2104
+ "loss": 0.4162,
2105
+ "step": 27000
2106
+ },
2107
+ {
2108
+ "epoch": 38.85,
2109
+ "eval_loss": 0.07516241818666458,
2110
+ "eval_runtime": 56.61,
2111
+ "eval_samples_per_second": 8.055,
2112
+ "eval_steps_per_second": 1.007,
2113
+ "eval_wer": 0.06448202959830866,
2114
+ "step": 27000
2115
+ },
2116
+ {
2117
+ "epoch": 38.99,
2118
+ "learning_rate": 1.7553435114503814e-05,
2119
+ "loss": 0.4076,
2120
+ "step": 27100
2121
+ },
2122
+ {
2123
+ "epoch": 39.14,
2124
+ "learning_rate": 1.7324427480916027e-05,
2125
+ "loss": 0.4073,
2126
+ "step": 27200
2127
+ },
2128
+ {
2129
+ "epoch": 39.28,
2130
+ "learning_rate": 1.7095419847328244e-05,
2131
+ "loss": 0.4042,
2132
+ "step": 27300
2133
+ },
2134
+ {
2135
+ "epoch": 39.42,
2136
+ "learning_rate": 1.6866412213740458e-05,
2137
+ "loss": 0.4035,
2138
+ "step": 27400
2139
+ },
2140
+ {
2141
+ "epoch": 39.57,
2142
+ "learning_rate": 1.663740458015267e-05,
2143
+ "loss": 0.3979,
2144
+ "step": 27500
2145
+ },
2146
+ {
2147
+ "epoch": 39.57,
2148
+ "eval_loss": 0.07611743360757828,
2149
+ "eval_runtime": 56.1996,
2150
+ "eval_samples_per_second": 8.114,
2151
+ "eval_steps_per_second": 1.014,
2152
+ "eval_wer": 0.062065841135608577,
2153
+ "step": 27500
2154
+ },
2155
+ {
2156
+ "epoch": 39.71,
2157
+ "learning_rate": 1.6408396946564885e-05,
2158
+ "loss": 0.3923,
2159
+ "step": 27600
2160
+ },
2161
+ {
2162
+ "epoch": 39.86,
2163
+ "learning_rate": 1.6179389312977098e-05,
2164
+ "loss": 0.395,
2165
+ "step": 27700
2166
+ },
2167
+ {
2168
+ "epoch": 40.0,
2169
+ "learning_rate": 1.595038167938931e-05,
2170
+ "loss": 0.3931,
2171
+ "step": 27800
2172
+ },
2173
+ {
2174
+ "epoch": 40.14,
2175
+ "learning_rate": 1.5721374045801525e-05,
2176
+ "loss": 0.3969,
2177
+ "step": 27900
2178
+ },
2179
+ {
2180
+ "epoch": 40.29,
2181
+ "learning_rate": 1.549236641221374e-05,
2182
+ "loss": 0.3889,
2183
+ "step": 28000
2184
+ },
2185
+ {
2186
+ "epoch": 40.29,
2187
+ "eval_loss": 0.07714105397462845,
2188
+ "eval_runtime": 56.3655,
2189
+ "eval_samples_per_second": 8.09,
2190
+ "eval_steps_per_second": 1.011,
2191
+ "eval_wer": 0.06251887647236484,
2192
+ "step": 28000
2193
+ },
2194
+ {
2195
+ "epoch": 40.43,
2196
+ "learning_rate": 1.5263358778625952e-05,
2197
+ "loss": 0.387,
2198
+ "step": 28100
2199
+ },
2200
+ {
2201
+ "epoch": 40.57,
2202
+ "learning_rate": 1.5034351145038167e-05,
2203
+ "loss": 0.3911,
2204
+ "step": 28200
2205
+ },
2206
+ {
2207
+ "epoch": 40.72,
2208
+ "learning_rate": 1.480534351145038e-05,
2209
+ "loss": 0.3865,
2210
+ "step": 28300
2211
+ },
2212
+ {
2213
+ "epoch": 40.86,
2214
+ "learning_rate": 1.4576335877862594e-05,
2215
+ "loss": 0.382,
2216
+ "step": 28400
2217
+ },
2218
+ {
2219
+ "epoch": 41.01,
2220
+ "learning_rate": 1.4347328244274809e-05,
2221
+ "loss": 0.3923,
2222
+ "step": 28500
2223
+ },
2224
+ {
2225
+ "epoch": 41.01,
2226
+ "eval_loss": 0.07554154098033905,
2227
+ "eval_runtime": 57.0575,
2228
+ "eval_samples_per_second": 7.992,
2229
+ "eval_steps_per_second": 0.999,
2230
+ "eval_wer": 0.059800664451827246,
2231
+ "step": 28500
2232
+ },
2233
+ {
2234
+ "epoch": 41.15,
2235
+ "learning_rate": 1.4118320610687023e-05,
2236
+ "loss": 0.3723,
2237
+ "step": 28600
2238
+ },
2239
+ {
2240
+ "epoch": 41.29,
2241
+ "learning_rate": 1.3889312977099236e-05,
2242
+ "loss": 0.3797,
2243
+ "step": 28700
2244
+ },
2245
+ {
2246
+ "epoch": 41.44,
2247
+ "learning_rate": 1.3662595419847327e-05,
2248
+ "loss": 0.3823,
2249
+ "step": 28800
2250
+ },
2251
+ {
2252
+ "epoch": 41.58,
2253
+ "learning_rate": 1.343358778625954e-05,
2254
+ "loss": 0.3691,
2255
+ "step": 28900
2256
+ },
2257
+ {
2258
+ "epoch": 41.73,
2259
+ "learning_rate": 1.3204580152671754e-05,
2260
+ "loss": 0.3693,
2261
+ "step": 29000
2262
+ },
2263
+ {
2264
+ "epoch": 41.73,
2265
+ "eval_loss": 0.07298705726861954,
2266
+ "eval_runtime": 59.4935,
2267
+ "eval_samples_per_second": 7.665,
2268
+ "eval_steps_per_second": 0.958,
2269
+ "eval_wer": 0.05783751132588342,
2270
+ "step": 29000
2271
+ },
2272
+ {
2273
+ "epoch": 41.87,
2274
+ "learning_rate": 1.2975572519083969e-05,
2275
+ "loss": 0.3739,
2276
+ "step": 29100
2277
+ },
2278
+ {
2279
+ "epoch": 42.01,
2280
+ "learning_rate": 1.2746564885496182e-05,
2281
+ "loss": 0.3797,
2282
+ "step": 29200
2283
+ },
2284
+ {
2285
+ "epoch": 42.16,
2286
+ "learning_rate": 1.2517557251908396e-05,
2287
+ "loss": 0.3704,
2288
+ "step": 29300
2289
+ },
2290
+ {
2291
+ "epoch": 42.3,
2292
+ "learning_rate": 1.2288549618320609e-05,
2293
+ "loss": 0.3702,
2294
+ "step": 29400
2295
+ },
2296
+ {
2297
+ "epoch": 42.45,
2298
+ "learning_rate": 1.2059541984732823e-05,
2299
+ "loss": 0.3642,
2300
+ "step": 29500
2301
+ },
2302
+ {
2303
+ "epoch": 42.45,
2304
+ "eval_loss": 0.07388342171907425,
2305
+ "eval_runtime": 56.6481,
2306
+ "eval_samples_per_second": 8.05,
2307
+ "eval_steps_per_second": 1.006,
2308
+ "eval_wer": 0.059800664451827246,
2309
+ "step": 29500
2310
+ },
2311
+ {
2312
+ "epoch": 42.59,
2313
+ "learning_rate": 1.1830534351145038e-05,
2314
+ "loss": 0.3665,
2315
+ "step": 29600
2316
+ },
2317
+ {
2318
+ "epoch": 42.73,
2319
+ "learning_rate": 1.1601526717557251e-05,
2320
+ "loss": 0.3626,
2321
+ "step": 29700
2322
+ },
2323
+ {
2324
+ "epoch": 42.88,
2325
+ "learning_rate": 1.1372519083969465e-05,
2326
+ "loss": 0.3499,
2327
+ "step": 29800
2328
+ },
2329
+ {
2330
+ "epoch": 43.02,
2331
+ "learning_rate": 1.1143511450381678e-05,
2332
+ "loss": 0.3632,
2333
+ "step": 29900
2334
+ },
2335
+ {
2336
+ "epoch": 43.17,
2337
+ "learning_rate": 1.0914503816793893e-05,
2338
+ "loss": 0.3532,
2339
+ "step": 30000
2340
+ },
2341
+ {
2342
+ "epoch": 43.17,
2343
+ "eval_loss": 0.07121992111206055,
2344
+ "eval_runtime": 57.0783,
2345
+ "eval_samples_per_second": 7.989,
2346
+ "eval_steps_per_second": 0.999,
2347
+ "eval_wer": 0.05527031108426457,
2348
+ "step": 30000
2349
+ },
2350
+ {
2351
+ "epoch": 43.31,
2352
+ "learning_rate": 1.0685496183206107e-05,
2353
+ "loss": 0.3531,
2354
+ "step": 30100
2355
+ },
2356
+ {
2357
+ "epoch": 43.45,
2358
+ "learning_rate": 1.045648854961832e-05,
2359
+ "loss": 0.361,
2360
+ "step": 30200
2361
+ },
2362
+ {
2363
+ "epoch": 43.6,
2364
+ "learning_rate": 1.0227480916030534e-05,
2365
+ "loss": 0.3508,
2366
+ "step": 30300
2367
+ },
2368
+ {
2369
+ "epoch": 43.74,
2370
+ "learning_rate": 9.998473282442747e-06,
2371
+ "loss": 0.352,
2372
+ "step": 30400
2373
+ },
2374
+ {
2375
+ "epoch": 43.88,
2376
+ "learning_rate": 9.769465648854962e-06,
2377
+ "loss": 0.3513,
2378
+ "step": 30500
2379
+ },
2380
+ {
2381
+ "epoch": 43.88,
2382
+ "eval_loss": 0.07616806775331497,
2383
+ "eval_runtime": 56.3541,
2384
+ "eval_samples_per_second": 8.092,
2385
+ "eval_steps_per_second": 1.011,
2386
+ "eval_wer": 0.051646028390214434,
2387
+ "step": 30500
2388
+ },
2389
+ {
2390
+ "epoch": 44.03,
2391
+ "learning_rate": 9.540458015267176e-06,
2392
+ "loss": 0.3447,
2393
+ "step": 30600
2394
+ },
2395
+ {
2396
+ "epoch": 44.17,
2397
+ "learning_rate": 9.311450381679389e-06,
2398
+ "loss": 0.3463,
2399
+ "step": 30700
2400
+ },
2401
+ {
2402
+ "epoch": 44.32,
2403
+ "learning_rate": 9.082442748091603e-06,
2404
+ "loss": 0.3373,
2405
+ "step": 30800
2406
+ },
2407
+ {
2408
+ "epoch": 44.46,
2409
+ "learning_rate": 8.855725190839693e-06,
2410
+ "loss": 0.3419,
2411
+ "step": 30900
2412
+ },
2413
+ {
2414
+ "epoch": 44.6,
2415
+ "learning_rate": 8.626717557251907e-06,
2416
+ "loss": 0.3349,
2417
+ "step": 31000
2418
+ },
2419
+ {
2420
+ "epoch": 44.6,
2421
+ "eval_loss": 0.07309506088495255,
2422
+ "eval_runtime": 56.1703,
2423
+ "eval_samples_per_second": 8.118,
2424
+ "eval_steps_per_second": 1.015,
2425
+ "eval_wer": 0.05043793415886439,
2426
+ "step": 31000
2427
+ },
2428
+ {
2429
+ "epoch": 44.75,
2430
+ "learning_rate": 8.397709923664122e-06,
2431
+ "loss": 0.3352,
2432
+ "step": 31100
2433
+ },
2434
+ {
2435
+ "epoch": 44.89,
2436
+ "learning_rate": 8.168702290076335e-06,
2437
+ "loss": 0.3376,
2438
+ "step": 31200
2439
+ },
2440
+ {
2441
+ "epoch": 45.04,
2442
+ "learning_rate": 7.939694656488549e-06,
2443
+ "loss": 0.3361,
2444
+ "step": 31300
2445
+ },
2446
+ {
2447
+ "epoch": 45.18,
2448
+ "learning_rate": 7.710687022900762e-06,
2449
+ "loss": 0.3318,
2450
+ "step": 31400
2451
+ },
2452
+ {
2453
+ "epoch": 45.32,
2454
+ "learning_rate": 7.4816793893129765e-06,
2455
+ "loss": 0.3305,
2456
+ "step": 31500
2457
+ },
2458
+ {
2459
+ "epoch": 45.32,
2460
+ "eval_loss": 0.0724666640162468,
2461
+ "eval_runtime": 58.1784,
2462
+ "eval_samples_per_second": 7.838,
2463
+ "eval_steps_per_second": 0.98,
2464
+ "eval_wer": 0.0507399577167019,
2465
+ "step": 31500
2466
+ },
2467
+ {
2468
+ "epoch": 45.47,
2469
+ "learning_rate": 7.25267175572519e-06,
2470
+ "loss": 0.3218,
2471
+ "step": 31600
2472
+ },
2473
+ {
2474
+ "epoch": 45.61,
2475
+ "learning_rate": 7.0259541984732824e-06,
2476
+ "loss": 0.3254,
2477
+ "step": 31700
2478
+ },
2479
+ {
2480
+ "epoch": 45.75,
2481
+ "learning_rate": 6.799236641221373e-06,
2482
+ "loss": 0.3256,
2483
+ "step": 31800
2484
+ },
2485
+ {
2486
+ "epoch": 45.9,
2487
+ "learning_rate": 6.570229007633587e-06,
2488
+ "loss": 0.3198,
2489
+ "step": 31900
2490
+ },
2491
+ {
2492
+ "epoch": 46.04,
2493
+ "learning_rate": 6.341221374045801e-06,
2494
+ "loss": 0.3285,
2495
+ "step": 32000
2496
+ },
2497
+ {
2498
+ "epoch": 46.04,
2499
+ "eval_loss": 0.07090699672698975,
2500
+ "eval_runtime": 59.4451,
2501
+ "eval_samples_per_second": 7.671,
2502
+ "eval_steps_per_second": 0.959,
2503
+ "eval_wer": 0.04892781636967684,
2504
+ "step": 32000
2505
+ },
2506
+ {
2507
+ "epoch": 46.19,
2508
+ "learning_rate": 6.112213740458014e-06,
2509
+ "loss": 0.323,
2510
+ "step": 32100
2511
+ },
2512
+ {
2513
+ "epoch": 46.33,
2514
+ "learning_rate": 5.883206106870229e-06,
2515
+ "loss": 0.3175,
2516
+ "step": 32200
2517
+ },
2518
+ {
2519
+ "epoch": 46.47,
2520
+ "learning_rate": 5.654198473282442e-06,
2521
+ "loss": 0.3191,
2522
+ "step": 32300
2523
+ },
2524
+ {
2525
+ "epoch": 46.62,
2526
+ "learning_rate": 5.425190839694656e-06,
2527
+ "loss": 0.313,
2528
+ "step": 32400
2529
+ },
2530
+ {
2531
+ "epoch": 46.76,
2532
+ "learning_rate": 5.19618320610687e-06,
2533
+ "loss": 0.3179,
2534
+ "step": 32500
2535
+ },
2536
+ {
2537
+ "epoch": 46.76,
2538
+ "eval_loss": 0.06665544956922531,
2539
+ "eval_runtime": 56.3776,
2540
+ "eval_samples_per_second": 8.088,
2541
+ "eval_steps_per_second": 1.011,
2542
+ "eval_wer": 0.0466626396858955,
2543
+ "step": 32500
2544
+ },
2545
+ {
2546
+ "epoch": 46.91,
2547
+ "learning_rate": 4.967175572519083e-06,
2548
+ "loss": 0.316,
2549
+ "step": 32600
2550
+ },
2551
+ {
2552
+ "epoch": 47.05,
2553
+ "learning_rate": 4.738167938931298e-06,
2554
+ "loss": 0.3107,
2555
+ "step": 32700
2556
+ },
2557
+ {
2558
+ "epoch": 47.19,
2559
+ "learning_rate": 4.509160305343511e-06,
2560
+ "loss": 0.3094,
2561
+ "step": 32800
2562
+ },
2563
+ {
2564
+ "epoch": 47.34,
2565
+ "learning_rate": 4.2801526717557246e-06,
2566
+ "loss": 0.3161,
2567
+ "step": 32900
2568
+ },
2569
+ {
2570
+ "epoch": 47.48,
2571
+ "learning_rate": 4.051145038167939e-06,
2572
+ "loss": 0.3158,
2573
+ "step": 33000
2574
+ },
2575
+ {
2576
+ "epoch": 47.48,
2577
+ "eval_loss": 0.06528327614068985,
2578
+ "eval_runtime": 56.0724,
2579
+ "eval_samples_per_second": 8.132,
2580
+ "eval_steps_per_second": 1.017,
2581
+ "eval_wer": 0.0493808517064331,
2582
+ "step": 33000
2583
+ },
2584
+ {
2585
+ "epoch": 47.63,
2586
+ "learning_rate": 3.822137404580152e-06,
2587
+ "loss": 0.3153,
2588
+ "step": 33100
2589
+ },
2590
+ {
2591
+ "epoch": 47.77,
2592
+ "learning_rate": 3.593129770992366e-06,
2593
+ "loss": 0.3005,
2594
+ "step": 33200
2595
+ },
2596
+ {
2597
+ "epoch": 47.91,
2598
+ "learning_rate": 3.36412213740458e-06,
2599
+ "loss": 0.3089,
2600
+ "step": 33300
2601
+ },
2602
+ {
2603
+ "epoch": 48.06,
2604
+ "learning_rate": 3.135114503816794e-06,
2605
+ "loss": 0.3068,
2606
+ "step": 33400
2607
+ },
2608
+ {
2609
+ "epoch": 48.2,
2610
+ "learning_rate": 2.906106870229007e-06,
2611
+ "loss": 0.3033,
2612
+ "step": 33500
2613
+ },
2614
+ {
2615
+ "epoch": 48.2,
2616
+ "eval_loss": 0.06382497400045395,
2617
+ "eval_runtime": 57.4782,
2618
+ "eval_samples_per_second": 7.933,
2619
+ "eval_steps_per_second": 0.992,
2620
+ "eval_wer": 0.04560555723346421,
2621
+ "step": 33500
2622
+ },
2623
+ {
2624
+ "epoch": 48.34,
2625
+ "learning_rate": 2.677099236641221e-06,
2626
+ "loss": 0.3039,
2627
+ "step": 33600
2628
+ },
2629
+ {
2630
+ "epoch": 48.49,
2631
+ "learning_rate": 2.4480916030534347e-06,
2632
+ "loss": 0.2983,
2633
+ "step": 33700
2634
+ },
2635
+ {
2636
+ "epoch": 48.63,
2637
+ "learning_rate": 2.219083969465649e-06,
2638
+ "loss": 0.3069,
2639
+ "step": 33800
2640
+ },
2641
+ {
2642
+ "epoch": 48.78,
2643
+ "learning_rate": 1.9900763358778624e-06,
2644
+ "loss": 0.2987,
2645
+ "step": 33900
2646
+ },
2647
+ {
2648
+ "epoch": 48.92,
2649
+ "learning_rate": 1.761068702290076e-06,
2650
+ "loss": 0.3023,
2651
+ "step": 34000
2652
+ },
2653
+ {
2654
+ "epoch": 48.92,
2655
+ "eval_loss": 0.06439350545406342,
2656
+ "eval_runtime": 56.5994,
2657
+ "eval_samples_per_second": 8.057,
2658
+ "eval_steps_per_second": 1.007,
2659
+ "eval_wer": 0.04636061612805799,
2660
+ "step": 34000
2661
+ },
2662
+ {
2663
+ "epoch": 49.06,
2664
+ "learning_rate": 1.53206106870229e-06,
2665
+ "loss": 0.2919,
2666
+ "step": 34100
2667
+ },
2668
+ {
2669
+ "epoch": 49.21,
2670
+ "learning_rate": 1.3030534351145036e-06,
2671
+ "loss": 0.2912,
2672
+ "step": 34200
2673
+ },
2674
+ {
2675
+ "epoch": 49.35,
2676
+ "learning_rate": 1.0740458015267175e-06,
2677
+ "loss": 0.3036,
2678
+ "step": 34300
2679
+ },
2680
+ {
2681
+ "epoch": 49.5,
2682
+ "learning_rate": 8.450381679389312e-07,
2683
+ "loss": 0.3025,
2684
+ "step": 34400
2685
+ },
2686
+ {
2687
+ "epoch": 49.64,
2688
+ "learning_rate": 6.160305343511449e-07,
2689
+ "loss": 0.2975,
2690
+ "step": 34500
2691
+ },
2692
+ {
2693
+ "epoch": 49.64,
2694
+ "eval_loss": 0.06428828090429306,
2695
+ "eval_runtime": 56.8585,
2696
+ "eval_samples_per_second": 8.02,
2697
+ "eval_steps_per_second": 1.002,
2698
+ "eval_wer": 0.045454545454545456,
2699
+ "step": 34500
2700
+ },
2701
+ {
2702
+ "epoch": 49.78,
2703
+ "learning_rate": 3.8702290076335876e-07,
2704
+ "loss": 0.2955,
2705
+ "step": 34600
2706
+ },
2707
+ {
2708
+ "epoch": 49.93,
2709
+ "learning_rate": 1.580152671755725e-07,
2710
+ "loss": 0.2952,
2711
+ "step": 34700
2712
+ },
2713
+ {
2714
+ "epoch": 50.0,
2715
+ "step": 34750,
2716
+ "total_flos": 8.643412752890073e+20,
2717
+ "train_loss": 1.0736439298451375,
2718
+ "train_runtime": 194988.743,
2719
+ "train_samples_per_second": 5.709,
2720
+ "train_steps_per_second": 0.178
2721
+ }
2722
+ ],
2723
+ "max_steps": 34750,
2724
+ "num_train_epochs": 50,
2725
+ "total_flos": 8.643412752890073e+20,
2726
+ "trial_name": null,
2727
+ "trial_params": null
2728
+ }