w11wo commited on
Commit
b8d7f07
1 Parent(s): 2be5bf6

End of training

Browse files
all_results.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 50.0,
3
+ "eval_cer": 0.09533931664304834,
4
+ "eval_loss": 0.20887407660484314,
5
+ "eval_runtime": 41.7123,
6
+ "eval_samples": 456,
7
+ "eval_samples_per_second": 10.932,
8
+ "eval_steps_per_second": 1.367,
9
+ "eval_wer": 0.2953790395650861,
10
+ "train_loss": 2.2316733406121783,
11
+ "train_runtime": 114311.9751,
12
+ "train_samples": 22262,
13
+ "train_samples_per_second": 9.737,
14
+ "train_steps_per_second": 0.304
15
+ }
eval_results.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 50.0,
3
+ "eval_cer": 0.09533931664304834,
4
+ "eval_loss": 0.20887407660484314,
5
+ "eval_runtime": 41.7123,
6
+ "eval_samples": 456,
7
+ "eval_samples_per_second": 10.932,
8
+ "eval_steps_per_second": 1.367,
9
+ "eval_wer": 0.2953790395650861
10
+ }
nohup.out CHANGED
@@ -5718,3 +5718,26 @@ The progress bars may be unreliable.
5718
 
5719
  Dropping the following result as it does not have all the necessary fields:
5720
  {'dataset': {'name': 'zeroth_korean_asr', 'type': 'zeroth_korean_asr', 'args': 'clean'}}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5721
  0%| | 0/57 [00:00<?, ?it/s]
5722
  4%|▎ | 2/57 [00:00<00:20, 2.74it/s]
5723
  5%|▌ | 3/57 [00:01<00:25, 2.08it/s]
5724
  7%|▋ | 4/57 [00:02<00:29, 1.81it/s]
5725
  9%|▉ | 5/57 [00:02<00:28, 1.83it/s]
5726
  11%|█ | 6/57 [00:03<00:29, 1.75it/s]
5727
  12%|█▏ | 7/57 [00:03<00:28, 1.74it/s]
5728
  14%|█▍ | 8/57 [00:04<00:28, 1.70it/s]
5729
  16%|█▌ | 9/57 [00:04<00:27, 1.72it/s]
5730
  18%|█▊ | 10/57 [00:05<00:27, 1.73it/s]
5731
  19%|█▉ | 11/57 [00:06<00:27, 1.66it/s]
5732
  21%|██ | 12/57 [00:06<00:29, 1.53it/s]
5733
  23%|██▎ | 13/57 [00:07<00:30, 1.42it/s]
5734
  25%|██▍ | 14/57 [00:08<00:29, 1.45it/s]
5735
  26%|██▋ | 15/57 [00:09<00:31, 1.35it/s]
5736
  28%|██▊ | 16/57 [00:09<00:28, 1.44it/s]
5737
  30%|██▉ | 17/57 [00:10<00:26, 1.52it/s]
5738
  32%|███▏ | 18/57 [00:11<00:24, 1.60it/s]
5739
  33%|███▎ | 19/57 [00:11<00:22, 1.66it/s]
5740
  35%|███▌ | 20/57 [00:12<00:22, 1.65it/s]
5741
  37%|███▋ | 21/57 [00:12<00:21, 1.64it/s]
5742
  39%|███▊ | 22/57 [00:13<00:23, 1.52it/s]
5743
  40%|████ | 23/57 [00:14<00:24, 1.40it/s]
5744
  42%|████▏ | 24/57 [00:15<00:23, 1.41it/s]
5745
  44%|████▍ | 25/57 [00:15<00:21, 1.46it/s]
5746
  46%|████▌ | 26/57 [00:16<00:20, 1.54it/s]
5747
  47%|████▋ | 27/57 [00:16<00:18, 1.64it/s]
5748
  49%|████▉ | 28/57 [00:17<00:18, 1.57it/s]
5749
  51%|█████ | 29/57 [00:18<00:18, 1.54it/s]
5750
  53%|█████▎ | 30/57 [00:18<00:16, 1.67it/s]
5751
  54%|█████▍ | 31/57 [00:19<00:14, 1.81it/s]
5752
  56%|█████▌ | 32/57 [00:19<00:14, 1.76it/s]
5753
  58%|█████▊ | 33/57 [00:20<00:14, 1.65it/s]
5754
  60%|█████▉ | 34/57 [00:21<00:14, 1.62it/s]
5755
  61%|██████▏ | 35/57 [00:21<00:14, 1.56it/s]
5756
  63%|██████▎ | 36/57 [00:22<00:13, 1.57it/s]
5757
  65%|██████▍ | 37/57 [00:23<00:13, 1.54it/s]
5758
  67%|██████▋ | 38/57 [00:23<00:12, 1.48it/s]
5759
  68%|██████▊ | 39/57 [00:24<00:12, 1.45it/s]
5760
  70%|███████ | 40/57 [00:25<00:11, 1.43it/s]
5761
  72%|███████▏ | 41/57 [00:26<00:11, 1.39it/s]
5762
  74%|███████▎ | 42/57 [00:26<00:11, 1.36it/s]
5763
  75%|███████▌ | 43/57 [00:27<00:09, 1.40it/s]
5764
  77%|███████▋ | 44/57 [00:28<00:09, 1.37it/s]
5765
  79%|███████▉ | 45/57 [00:28<00:07, 1.53it/s]
5766
  81%|████████ | 46/57 [00:29<00:06, 1.58it/s]
5767
  82%|████████▏ | 47/57 [00:30<00:06, 1.52it/s]
5768
  84%|████████▍ | 48/57 [00:30<00:05, 1.60it/s]
5769
  86%|████████▌ | 49/57 [00:31<00:04, 1.64it/s]
5770
  88%|████████▊ | 50/57 [00:31<00:04, 1.64it/s]
5771
  89%|████████▉ | 51/57 [00:32<00:03, 1.65it/s]
5772
  91%|█████████ | 52/57 [00:32<00:02, 1.67it/s]
5773
  93%|█████████▎| 53/57 [00:33<00:02, 1.79it/s]
5774
  95%|█████████▍| 54/57 [00:33<00:01, 1.75it/s]
5775
  96%|█████████▋| 55/57 [00:34<00:01, 1.58it/s]
5776
  98%|█████████▊| 56/57 [00:35<00:00, 1.55it/s]
 
 
 
 
5718
 
5719
  Dropping the following result as it does not have all the necessary fields:
5720
  {'dataset': {'name': 'zeroth_korean_asr', 'type': 'zeroth_korean_asr', 'args': 'clean'}}
5721
+ To https://huggingface.co/w11wo/wav2vec2-xls-r-300m-korean
5722
+ 024e2da..2be5bf6 main -> main
5723
+
5724
+ 02/01/2022 15:05:53 - WARNING - huggingface_hub.repository - To https://huggingface.co/w11wo/wav2vec2-xls-r-300m-korean
5725
+ 024e2da..2be5bf6 main -> main
5726
+
5727
+ ***** train metrics *****
5728
+ epoch = 50.0
5729
+ train_loss = 2.2317
5730
+ train_runtime = 1 day, 7:45:11.97
5731
+ train_samples = 22262
5732
+ train_samples_per_second = 9.737
5733
+ train_steps_per_second = 0.304
5734
+ 02/01/2022 15:05:56 - INFO - __main__ - *** Evaluate ***
5735
+ The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.
5736
+ ***** Running Evaluation *****
5737
+ Num examples = 456
5738
+ Batch size = 8
5739
+
5740
  0%| | 0/57 [00:00<?, ?it/s]
5741
  4%|▎ | 2/57 [00:00<00:20, 2.74it/s]
5742
  5%|▌ | 3/57 [00:01<00:25, 2.08it/s]
5743
  7%|▋ | 4/57 [00:02<00:29, 1.81it/s]
5744
  9%|▉ | 5/57 [00:02<00:28, 1.83it/s]
5745
  11%|█ | 6/57 [00:03<00:29, 1.75it/s]
5746
  12%|█▏ | 7/57 [00:03<00:28, 1.74it/s]
5747
  14%|█▍ | 8/57 [00:04<00:28, 1.70it/s]
5748
  16%|█▌ | 9/57 [00:04<00:27, 1.72it/s]
5749
  18%|█▊ | 10/57 [00:05<00:27, 1.73it/s]
5750
  19%|█▉ | 11/57 [00:06<00:27, 1.66it/s]
5751
  21%|██ | 12/57 [00:06<00:29, 1.53it/s]
5752
  23%|██▎ | 13/57 [00:07<00:30, 1.42it/s]
5753
  25%|██▍ | 14/57 [00:08<00:29, 1.45it/s]
5754
  26%|██▋ | 15/57 [00:09<00:31, 1.35it/s]
5755
  28%|██▊ | 16/57 [00:09<00:28, 1.44it/s]
5756
  30%|██▉ | 17/57 [00:10<00:26, 1.52it/s]
5757
  32%|███▏ | 18/57 [00:11<00:24, 1.60it/s]
5758
  33%|███▎ | 19/57 [00:11<00:22, 1.66it/s]
5759
  35%|███▌ | 20/57 [00:12<00:22, 1.65it/s]
5760
  37%|███▋ | 21/57 [00:12<00:21, 1.64it/s]
5761
  39%|███▊ | 22/57 [00:13<00:23, 1.52it/s]
5762
  40%|████ | 23/57 [00:14<00:24, 1.40it/s]
5763
  42%|████▏ | 24/57 [00:15<00:23, 1.41it/s]
5764
  44%|████▍ | 25/57 [00:15<00:21, 1.46it/s]
5765
  46%|████▌ | 26/57 [00:16<00:20, 1.54it/s]
5766
  47%|████▋ | 27/57 [00:16<00:18, 1.64it/s]
5767
  49%|████▉ | 28/57 [00:17<00:18, 1.57it/s]
5768
  51%|█████ | 29/57 [00:18<00:18, 1.54it/s]
5769
  53%|█████▎ | 30/57 [00:18<00:16, 1.67it/s]
5770
  54%|█████▍ | 31/57 [00:19<00:14, 1.81it/s]
5771
  56%|█████▌ | 32/57 [00:19<00:14, 1.76it/s]
5772
  58%|█████▊ | 33/57 [00:20<00:14, 1.65it/s]
5773
  60%|█████▉ | 34/57 [00:21<00:14, 1.62it/s]
5774
  61%|██████▏ | 35/57 [00:21<00:14, 1.56it/s]
5775
  63%|██████▎ | 36/57 [00:22<00:13, 1.57it/s]
5776
  65%|██████▍ | 37/57 [00:23<00:13, 1.54it/s]
5777
  67%|██████▋ | 38/57 [00:23<00:12, 1.48it/s]
5778
  68%|██████▊ | 39/57 [00:24<00:12, 1.45it/s]
5779
  70%|███████ | 40/57 [00:25<00:11, 1.43it/s]
5780
  72%|███████▏ | 41/57 [00:26<00:11, 1.39it/s]
5781
  74%|███████▎ | 42/57 [00:26<00:11, 1.36it/s]
5782
  75%|███████▌ | 43/57 [00:27<00:09, 1.40it/s]
5783
  77%|███████▋ | 44/57 [00:28<00:09, 1.37it/s]
5784
  79%|███████▉ | 45/57 [00:28<00:07, 1.53it/s]
5785
  81%|████████ | 46/57 [00:29<00:06, 1.58it/s]
5786
  82%|████████▏ | 47/57 [00:30<00:06, 1.52it/s]
5787
  84%|████████▍ | 48/57 [00:30<00:05, 1.60it/s]
5788
  86%|████████▌ | 49/57 [00:31<00:04, 1.64it/s]
5789
  88%|████████▊ | 50/57 [00:31<00:04, 1.64it/s]
5790
  89%|████████▉ | 51/57 [00:32<00:03, 1.65it/s]
5791
  91%|█████████ | 52/57 [00:32<00:02, 1.67it/s]
5792
  93%|█████████▎| 53/57 [00:33<00:02, 1.79it/s]
5793
  95%|█████████▍| 54/57 [00:33<00:01, 1.75it/s]
5794
  96%|█████████▋| 55/57 [00:34<00:01, 1.58it/s]
5795
  98%|█████████▊| 56/57 [00:35<00:00, 1.55it/s]
5796
+ Saving model checkpoint to ./
5797
+ Configuration saved in ./config.json
5798
+ Model weights saved in ./pytorch_model.bin
5799
+ Configuration saved in ./preprocessor_config.json
runs/Jan31_07-15-59_job-2c68f48a-2d5d-4013-9043-3f2cb25f3ff6/events.out.tfevents.1643727998.job-2c68f48a-2d5d-4013-9043-3f2cb25f3ff6.1151936.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fcf7744c50287e982c2028e14e51bc27110121c60f9a7d4ec7fc26a06f86232
3
+ size 412
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 50.0,
3
+ "train_loss": 2.2316733406121783,
4
+ "train_runtime": 114311.9751,
5
+ "train_samples": 22262,
6
+ "train_samples_per_second": 9.737,
7
+ "train_steps_per_second": 0.304
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,2797 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 49.99892202659001,
5
+ "global_step": 34750,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.14,
12
+ "learning_rate": 3.675e-06,
13
+ "loss": 47.2908,
14
+ "step": 100
15
+ },
16
+ {
17
+ "epoch": 0.29,
18
+ "learning_rate": 7.425e-06,
19
+ "loss": 33.9125,
20
+ "step": 200
21
+ },
22
+ {
23
+ "epoch": 0.43,
24
+ "learning_rate": 1.1174999999999999e-05,
25
+ "loss": 26.6068,
26
+ "step": 300
27
+ },
28
+ {
29
+ "epoch": 0.57,
30
+ "learning_rate": 1.4925e-05,
31
+ "loss": 23.2775,
32
+ "step": 400
33
+ },
34
+ {
35
+ "epoch": 0.72,
36
+ "learning_rate": 1.8675e-05,
37
+ "loss": 19.7138,
38
+ "step": 500
39
+ },
40
+ {
41
+ "epoch": 0.72,
42
+ "eval_cer": 1.0,
43
+ "eval_loss": 19.642736434936523,
44
+ "eval_runtime": 41.3907,
45
+ "eval_samples_per_second": 11.017,
46
+ "eval_steps_per_second": 1.377,
47
+ "eval_wer": 1.0,
48
+ "step": 500
49
+ },
50
+ {
51
+ "epoch": 0.86,
52
+ "learning_rate": 2.2424999999999996e-05,
53
+ "loss": 15.7715,
54
+ "step": 600
55
+ },
56
+ {
57
+ "epoch": 1.01,
58
+ "learning_rate": 2.6174999999999996e-05,
59
+ "loss": 11.4061,
60
+ "step": 700
61
+ },
62
+ {
63
+ "epoch": 1.15,
64
+ "learning_rate": 2.9925e-05,
65
+ "loss": 7.4329,
66
+ "step": 800
67
+ },
68
+ {
69
+ "epoch": 1.29,
70
+ "learning_rate": 3.3675e-05,
71
+ "loss": 5.3081,
72
+ "step": 900
73
+ },
74
+ {
75
+ "epoch": 1.44,
76
+ "learning_rate": 3.7424999999999995e-05,
77
+ "loss": 4.8039,
78
+ "step": 1000
79
+ },
80
+ {
81
+ "epoch": 1.44,
82
+ "eval_cer": 1.0,
83
+ "eval_loss": 4.784187316894531,
84
+ "eval_runtime": 42.2256,
85
+ "eval_samples_per_second": 10.799,
86
+ "eval_steps_per_second": 1.35,
87
+ "eval_wer": 1.0,
88
+ "step": 1000
89
+ },
90
+ {
91
+ "epoch": 1.58,
92
+ "learning_rate": 4.1175e-05,
93
+ "loss": 4.762,
94
+ "step": 1100
95
+ },
96
+ {
97
+ "epoch": 1.73,
98
+ "learning_rate": 4.4924999999999994e-05,
99
+ "loss": 4.6928,
100
+ "step": 1200
101
+ },
102
+ {
103
+ "epoch": 1.87,
104
+ "learning_rate": 4.8675e-05,
105
+ "loss": 4.6292,
106
+ "step": 1300
107
+ },
108
+ {
109
+ "epoch": 2.01,
110
+ "learning_rate": 5.2424999999999994e-05,
111
+ "loss": 4.6321,
112
+ "step": 1400
113
+ },
114
+ {
115
+ "epoch": 2.16,
116
+ "learning_rate": 5.6175e-05,
117
+ "loss": 4.5619,
118
+ "step": 1500
119
+ },
120
+ {
121
+ "epoch": 2.16,
122
+ "eval_cer": 0.9598094788222327,
123
+ "eval_loss": 4.560794830322266,
124
+ "eval_runtime": 41.0352,
125
+ "eval_samples_per_second": 11.112,
126
+ "eval_steps_per_second": 1.389,
127
+ "eval_wer": 0.9992449411054062,
128
+ "step": 1500
129
+ },
130
+ {
131
+ "epoch": 2.3,
132
+ "learning_rate": 5.9925e-05,
133
+ "loss": 4.4704,
134
+ "step": 1600
135
+ },
136
+ {
137
+ "epoch": 2.45,
138
+ "learning_rate": 6.367499999999999e-05,
139
+ "loss": 4.3806,
140
+ "step": 1700
141
+ },
142
+ {
143
+ "epoch": 2.59,
144
+ "learning_rate": 6.7425e-05,
145
+ "loss": 4.3092,
146
+ "step": 1800
147
+ },
148
+ {
149
+ "epoch": 2.73,
150
+ "learning_rate": 7.1175e-05,
151
+ "loss": 4.2794,
152
+ "step": 1900
153
+ },
154
+ {
155
+ "epoch": 2.88,
156
+ "learning_rate": 7.492499999999999e-05,
157
+ "loss": 4.254,
158
+ "step": 2000
159
+ },
160
+ {
161
+ "epoch": 2.88,
162
+ "eval_cer": 0.906274602424815,
163
+ "eval_loss": 4.272861003875732,
164
+ "eval_runtime": 40.8387,
165
+ "eval_samples_per_second": 11.166,
166
+ "eval_steps_per_second": 1.396,
167
+ "eval_wer": 0.9954696466324373,
168
+ "step": 2000
169
+ },
170
+ {
171
+ "epoch": 3.02,
172
+ "learning_rate": 7.477557251908395e-05,
173
+ "loss": 4.2616,
174
+ "step": 2100
175
+ },
176
+ {
177
+ "epoch": 3.17,
178
+ "learning_rate": 7.454656488549618e-05,
179
+ "loss": 4.2184,
180
+ "step": 2200
181
+ },
182
+ {
183
+ "epoch": 3.31,
184
+ "learning_rate": 7.43175572519084e-05,
185
+ "loss": 4.227,
186
+ "step": 2300
187
+ },
188
+ {
189
+ "epoch": 3.45,
190
+ "learning_rate": 7.408854961832061e-05,
191
+ "loss": 4.1985,
192
+ "step": 2400
193
+ },
194
+ {
195
+ "epoch": 3.6,
196
+ "learning_rate": 7.385954198473281e-05,
197
+ "loss": 4.1905,
198
+ "step": 2500
199
+ },
200
+ {
201
+ "epoch": 3.6,
202
+ "eval_cer": 0.8758463234136357,
203
+ "eval_loss": 4.225706100463867,
204
+ "eval_runtime": 40.6017,
205
+ "eval_samples_per_second": 11.231,
206
+ "eval_steps_per_second": 1.404,
207
+ "eval_wer": 0.9903352461491997,
208
+ "step": 2500
209
+ },
210
+ {
211
+ "epoch": 3.74,
212
+ "learning_rate": 7.36328244274809e-05,
213
+ "loss": 4.1873,
214
+ "step": 2600
215
+ },
216
+ {
217
+ "epoch": 3.88,
218
+ "learning_rate": 7.340381679389312e-05,
219
+ "loss": 4.1615,
220
+ "step": 2700
221
+ },
222
+ {
223
+ "epoch": 4.03,
224
+ "learning_rate": 7.317480916030534e-05,
225
+ "loss": 4.157,
226
+ "step": 2800
227
+ },
228
+ {
229
+ "epoch": 4.17,
230
+ "learning_rate": 7.294580152671756e-05,
231
+ "loss": 4.1124,
232
+ "step": 2900
233
+ },
234
+ {
235
+ "epoch": 4.32,
236
+ "learning_rate": 7.271679389312976e-05,
237
+ "loss": 4.0683,
238
+ "step": 3000
239
+ },
240
+ {
241
+ "epoch": 4.32,
242
+ "eval_cer": 0.7911352542906629,
243
+ "eval_loss": 3.929443120956421,
244
+ "eval_runtime": 41.2715,
245
+ "eval_samples_per_second": 11.049,
246
+ "eval_steps_per_second": 1.381,
247
+ "eval_wer": 0.9936575052854123,
248
+ "step": 3000
249
+ },
250
+ {
251
+ "epoch": 4.46,
252
+ "learning_rate": 7.248778625954197e-05,
253
+ "loss": 4.0704,
254
+ "step": 3100
255
+ },
256
+ {
257
+ "epoch": 4.6,
258
+ "learning_rate": 7.22587786259542e-05,
259
+ "loss": 3.9616,
260
+ "step": 3200
261
+ },
262
+ {
263
+ "epoch": 4.75,
264
+ "learning_rate": 7.202977099236641e-05,
265
+ "loss": 3.7798,
266
+ "step": 3300
267
+ },
268
+ {
269
+ "epoch": 4.89,
270
+ "learning_rate": 7.180076335877862e-05,
271
+ "loss": 3.6123,
272
+ "step": 3400
273
+ },
274
+ {
275
+ "epoch": 5.04,
276
+ "learning_rate": 7.15740458015267e-05,
277
+ "loss": 3.486,
278
+ "step": 3500
279
+ },
280
+ {
281
+ "epoch": 5.04,
282
+ "eval_cer": 0.5933711226578492,
283
+ "eval_loss": 2.704545497894287,
284
+ "eval_runtime": 40.8408,
285
+ "eval_samples_per_second": 11.165,
286
+ "eval_steps_per_second": 1.396,
287
+ "eval_wer": 1.0012080942313502,
288
+ "step": 3500
289
+ },
290
+ {
291
+ "epoch": 5.18,
292
+ "learning_rate": 7.134503816793892e-05,
293
+ "loss": 3.3283,
294
+ "step": 3600
295
+ },
296
+ {
297
+ "epoch": 5.32,
298
+ "learning_rate": 7.111603053435114e-05,
299
+ "loss": 3.2091,
300
+ "step": 3700
301
+ },
302
+ {
303
+ "epoch": 5.47,
304
+ "learning_rate": 7.088931297709923e-05,
305
+ "loss": 3.1158,
306
+ "step": 3800
307
+ },
308
+ {
309
+ "epoch": 5.61,
310
+ "learning_rate": 7.066030534351145e-05,
311
+ "loss": 2.9983,
312
+ "step": 3900
313
+ },
314
+ {
315
+ "epoch": 5.75,
316
+ "learning_rate": 7.043129770992365e-05,
317
+ "loss": 2.946,
318
+ "step": 4000
319
+ },
320
+ {
321
+ "epoch": 5.75,
322
+ "eval_cer": 0.4634309557549992,
323
+ "eval_loss": 1.9690674543380737,
324
+ "eval_runtime": 40.879,
325
+ "eval_samples_per_second": 11.155,
326
+ "eval_steps_per_second": 1.394,
327
+ "eval_wer": 0.942464512231954,
328
+ "step": 4000
329
+ },
330
+ {
331
+ "epoch": 5.9,
332
+ "learning_rate": 7.020229007633587e-05,
333
+ "loss": 2.8545,
334
+ "step": 4100
335
+ },
336
+ {
337
+ "epoch": 6.04,
338
+ "learning_rate": 6.997328244274808e-05,
339
+ "loss": 2.8092,
340
+ "step": 4200
341
+ },
342
+ {
343
+ "epoch": 6.19,
344
+ "learning_rate": 6.97442748091603e-05,
345
+ "loss": 2.7229,
346
+ "step": 4300
347
+ },
348
+ {
349
+ "epoch": 6.33,
350
+ "learning_rate": 6.951526717557252e-05,
351
+ "loss": 2.7053,
352
+ "step": 4400
353
+ },
354
+ {
355
+ "epoch": 6.47,
356
+ "learning_rate": 6.928625954198472e-05,
357
+ "loss": 2.634,
358
+ "step": 4500
359
+ },
360
+ {
361
+ "epoch": 6.47,
362
+ "eval_cer": 0.38501810738466385,
363
+ "eval_loss": 1.521231770515442,
364
+ "eval_runtime": 41.5435,
365
+ "eval_samples_per_second": 10.976,
366
+ "eval_steps_per_second": 1.372,
367
+ "eval_wer": 0.880700694654183,
368
+ "step": 4500
369
+ },
370
+ {
371
+ "epoch": 6.62,
372
+ "learning_rate": 6.905725190839693e-05,
373
+ "loss": 2.5996,
374
+ "step": 4600
375
+ },
376
+ {
377
+ "epoch": 6.76,
378
+ "learning_rate": 6.882824427480916e-05,
379
+ "loss": 2.5472,
380
+ "step": 4700
381
+ },
382
+ {
383
+ "epoch": 6.91,
384
+ "learning_rate": 6.859923664122137e-05,
385
+ "loss": 2.4959,
386
+ "step": 4800
387
+ },
388
+ {
389
+ "epoch": 7.05,
390
+ "learning_rate": 6.837022900763359e-05,
391
+ "loss": 2.4554,
392
+ "step": 4900
393
+ },
394
+ {
395
+ "epoch": 7.19,
396
+ "learning_rate": 6.814122137404579e-05,
397
+ "loss": 2.4066,
398
+ "step": 5000
399
+ },
400
+ {
401
+ "epoch": 7.19,
402
+ "eval_cer": 0.36014013541174617,
403
+ "eval_loss": 1.2550952434539795,
404
+ "eval_runtime": 41.0408,
405
+ "eval_samples_per_second": 11.111,
406
+ "eval_steps_per_second": 1.389,
407
+ "eval_wer": 0.8177287828450619,
408
+ "step": 5000
409
+ },
410
+ {
411
+ "epoch": 7.34,
412
+ "learning_rate": 6.791221374045801e-05,
413
+ "loss": 2.3768,
414
+ "step": 5100
415
+ },
416
+ {
417
+ "epoch": 7.48,
418
+ "learning_rate": 6.768320610687023e-05,
419
+ "loss": 2.3557,
420
+ "step": 5200
421
+ },
422
+ {
423
+ "epoch": 7.63,
424
+ "learning_rate": 6.745419847328244e-05,
425
+ "loss": 2.3109,
426
+ "step": 5300
427
+ },
428
+ {
429
+ "epoch": 7.77,
430
+ "learning_rate": 6.722519083969465e-05,
431
+ "loss": 2.2953,
432
+ "step": 5400
433
+ },
434
+ {
435
+ "epoch": 7.91,
436
+ "learning_rate": 6.699618320610687e-05,
437
+ "loss": 2.2651,
438
+ "step": 5500
439
+ },
440
+ {
441
+ "epoch": 7.91,
442
+ "eval_cer": 0.30392851519445757,
443
+ "eval_loss": 1.0423332452774048,
444
+ "eval_runtime": 40.9098,
445
+ "eval_samples_per_second": 11.146,
446
+ "eval_steps_per_second": 1.393,
447
+ "eval_wer": 0.7650256720024162,
448
+ "step": 5500
449
+ },
450
+ {
451
+ "epoch": 8.06,
452
+ "learning_rate": 6.676717557251908e-05,
453
+ "loss": 2.2589,
454
+ "step": 5600
455
+ },
456
+ {
457
+ "epoch": 8.2,
458
+ "learning_rate": 6.654045801526718e-05,
459
+ "loss": 2.2122,
460
+ "step": 5700
461
+ },
462
+ {
463
+ "epoch": 8.34,
464
+ "learning_rate": 6.631145038167939e-05,
465
+ "loss": 2.2017,
466
+ "step": 5800
467
+ },
468
+ {
469
+ "epoch": 8.49,
470
+ "learning_rate": 6.60824427480916e-05,
471
+ "loss": 2.1814,
472
+ "step": 5900
473
+ },
474
+ {
475
+ "epoch": 8.63,
476
+ "learning_rate": 6.58534351145038e-05,
477
+ "loss": 2.1828,
478
+ "step": 6000
479
+ },
480
+ {
481
+ "epoch": 8.63,
482
+ "eval_cer": 0.3106203747441348,
483
+ "eval_loss": 0.9598844051361084,
484
+ "eval_runtime": 41.4743,
485
+ "eval_samples_per_second": 10.995,
486
+ "eval_steps_per_second": 1.374,
487
+ "eval_wer": 0.7272727272727273,
488
+ "step": 6000
489
+ },
490
+ {
491
+ "epoch": 8.78,
492
+ "learning_rate": 6.562442748091603e-05,
493
+ "loss": 2.1714,
494
+ "step": 6100
495
+ },
496
+ {
497
+ "epoch": 8.92,
498
+ "learning_rate": 6.539541984732824e-05,
499
+ "loss": 2.1422,
500
+ "step": 6200
501
+ },
502
+ {
503
+ "epoch": 9.06,
504
+ "learning_rate": 6.516641221374046e-05,
505
+ "loss": 2.1546,
506
+ "step": 6300
507
+ },
508
+ {
509
+ "epoch": 9.21,
510
+ "learning_rate": 6.493740458015267e-05,
511
+ "loss": 2.12,
512
+ "step": 6400
513
+ },
514
+ {
515
+ "epoch": 9.35,
516
+ "learning_rate": 6.470839694656488e-05,
517
+ "loss": 2.1023,
518
+ "step": 6500
519
+ },
520
+ {
521
+ "epoch": 9.35,
522
+ "eval_cer": 0.30632971185640057,
523
+ "eval_loss": 0.9481843113899231,
524
+ "eval_runtime": 41.1867,
525
+ "eval_samples_per_second": 11.072,
526
+ "eval_steps_per_second": 1.384,
527
+ "eval_wer": 0.7160978556327393,
528
+ "step": 6500
529
+ },
530
+ {
531
+ "epoch": 9.5,
532
+ "learning_rate": 6.44793893129771e-05,
533
+ "loss": 2.1104,
534
+ "step": 6600
535
+ },
536
+ {
537
+ "epoch": 9.64,
538
+ "learning_rate": 6.425038167938931e-05,
539
+ "loss": 2.0879,
540
+ "step": 6700
541
+ },
542
+ {
543
+ "epoch": 9.78,
544
+ "learning_rate": 6.402137404580152e-05,
545
+ "loss": 2.0724,
546
+ "step": 6800
547
+ },
548
+ {
549
+ "epoch": 9.93,
550
+ "learning_rate": 6.379236641221374e-05,
551
+ "loss": 2.0622,
552
+ "step": 6900
553
+ },
554
+ {
555
+ "epoch": 10.07,
556
+ "learning_rate": 6.356335877862595e-05,
557
+ "loss": 2.0536,
558
+ "step": 7000
559
+ },
560
+ {
561
+ "epoch": 10.07,
562
+ "eval_cer": 0.28597858604944104,
563
+ "eval_loss": 0.8241907954216003,
564
+ "eval_runtime": 41.2837,
565
+ "eval_samples_per_second": 11.046,
566
+ "eval_steps_per_second": 1.381,
567
+ "eval_wer": 0.6766837813349441,
568
+ "step": 7000
569
+ },
570
+ {
571
+ "epoch": 10.22,
572
+ "learning_rate": 6.333435114503816e-05,
573
+ "loss": 2.0258,
574
+ "step": 7100
575
+ },
576
+ {
577
+ "epoch": 10.36,
578
+ "learning_rate": 6.310534351145038e-05,
579
+ "loss": 2.038,
580
+ "step": 7200
581
+ },
582
+ {
583
+ "epoch": 10.5,
584
+ "learning_rate": 6.287633587786259e-05,
585
+ "loss": 2.0093,
586
+ "step": 7300
587
+ },
588
+ {
589
+ "epoch": 10.65,
590
+ "learning_rate": 6.26473282442748e-05,
591
+ "loss": 1.9839,
592
+ "step": 7400
593
+ },
594
+ {
595
+ "epoch": 10.79,
596
+ "learning_rate": 6.241832061068702e-05,
597
+ "loss": 1.9803,
598
+ "step": 7500
599
+ },
600
+ {
601
+ "epoch": 10.79,
602
+ "eval_cer": 0.2636592662572823,
603
+ "eval_loss": 0.7643126845359802,
604
+ "eval_runtime": 41.3574,
605
+ "eval_samples_per_second": 11.026,
606
+ "eval_steps_per_second": 1.378,
607
+ "eval_wer": 0.6562971911809121,
608
+ "step": 7500
609
+ },
610
+ {
611
+ "epoch": 10.93,
612
+ "learning_rate": 6.218931297709923e-05,
613
+ "loss": 1.9704,
614
+ "step": 7600
615
+ },
616
+ {
617
+ "epoch": 11.08,
618
+ "learning_rate": 6.196030534351144e-05,
619
+ "loss": 1.9923,
620
+ "step": 7700
621
+ },
622
+ {
623
+ "epoch": 11.22,
624
+ "learning_rate": 6.173129770992366e-05,
625
+ "loss": 1.9549,
626
+ "step": 7800
627
+ },
628
+ {
629
+ "epoch": 11.37,
630
+ "learning_rate": 6.150229007633587e-05,
631
+ "loss": 1.9339,
632
+ "step": 7900
633
+ },
634
+ {
635
+ "epoch": 11.51,
636
+ "learning_rate": 6.127328244274808e-05,
637
+ "loss": 1.9468,
638
+ "step": 8000
639
+ },
640
+ {
641
+ "epoch": 11.51,
642
+ "eval_cer": 0.25051173043615177,
643
+ "eval_loss": 0.7318933606147766,
644
+ "eval_runtime": 40.8808,
645
+ "eval_samples_per_second": 11.154,
646
+ "eval_steps_per_second": 1.394,
647
+ "eval_wer": 0.644065237088493,
648
+ "step": 8000
649
+ },
650
+ {
651
+ "epoch": 11.65,
652
+ "learning_rate": 6.10442748091603e-05,
653
+ "loss": 1.9691,
654
+ "step": 8100
655
+ },
656
+ {
657
+ "epoch": 11.8,
658
+ "learning_rate": 6.081526717557252e-05,
659
+ "loss": 1.9845,
660
+ "step": 8200
661
+ },
662
+ {
663
+ "epoch": 11.94,
664
+ "learning_rate": 6.0586259541984725e-05,
665
+ "loss": 1.9561,
666
+ "step": 8300
667
+ },
668
+ {
669
+ "epoch": 12.09,
670
+ "learning_rate": 6.035725190839694e-05,
671
+ "loss": 1.9486,
672
+ "step": 8400
673
+ },
674
+ {
675
+ "epoch": 12.23,
676
+ "learning_rate": 6.012824427480916e-05,
677
+ "loss": 1.9178,
678
+ "step": 8500
679
+ },
680
+ {
681
+ "epoch": 12.23,
682
+ "eval_cer": 0.24893717524799244,
683
+ "eval_loss": 0.6936821937561035,
684
+ "eval_runtime": 41.1829,
685
+ "eval_samples_per_second": 11.073,
686
+ "eval_steps_per_second": 1.384,
687
+ "eval_wer": 0.6319842947749924,
688
+ "step": 8500
689
+ },
690
+ {
691
+ "epoch": 12.37,
692
+ "learning_rate": 5.989923664122137e-05,
693
+ "loss": 1.9133,
694
+ "step": 8600
695
+ },
696
+ {
697
+ "epoch": 12.52,
698
+ "learning_rate": 5.9670229007633586e-05,
699
+ "loss": 1.9327,
700
+ "step": 8700
701
+ },
702
+ {
703
+ "epoch": 12.66,
704
+ "learning_rate": 5.944122137404579e-05,
705
+ "loss": 1.8749,
706
+ "step": 8800
707
+ },
708
+ {
709
+ "epoch": 12.8,
710
+ "learning_rate": 5.9212213740458006e-05,
711
+ "loss": 1.8775,
712
+ "step": 8900
713
+ },
714
+ {
715
+ "epoch": 12.95,
716
+ "learning_rate": 5.8983206106870226e-05,
717
+ "loss": 1.8515,
718
+ "step": 9000
719
+ },
720
+ {
721
+ "epoch": 12.95,
722
+ "eval_cer": 0.21961108486852465,
723
+ "eval_loss": 0.6443303823471069,
724
+ "eval_runtime": 40.5279,
725
+ "eval_samples_per_second": 11.252,
726
+ "eval_steps_per_second": 1.406,
727
+ "eval_wer": 0.6052552099063727,
728
+ "step": 9000
729
+ },
730
+ {
731
+ "epoch": 13.09,
732
+ "learning_rate": 5.875419847328244e-05,
733
+ "loss": 1.8554,
734
+ "step": 9100
735
+ },
736
+ {
737
+ "epoch": 13.24,
738
+ "learning_rate": 5.852519083969465e-05,
739
+ "loss": 1.8568,
740
+ "step": 9200
741
+ },
742
+ {
743
+ "epoch": 13.38,
744
+ "learning_rate": 5.829618320610686e-05,
745
+ "loss": 1.8477,
746
+ "step": 9300
747
+ },
748
+ {
749
+ "epoch": 13.52,
750
+ "learning_rate": 5.806717557251908e-05,
751
+ "loss": 1.8328,
752
+ "step": 9400
753
+ },
754
+ {
755
+ "epoch": 13.67,
756
+ "learning_rate": 5.783816793893129e-05,
757
+ "loss": 1.8083,
758
+ "step": 9500
759
+ },
760
+ {
761
+ "epoch": 13.67,
762
+ "eval_cer": 0.21484805542434263,
763
+ "eval_loss": 0.6285760402679443,
764
+ "eval_runtime": 41.6653,
765
+ "eval_samples_per_second": 10.944,
766
+ "eval_steps_per_second": 1.368,
767
+ "eval_wer": 0.6122017517366355,
768
+ "step": 9500
769
+ },
770
+ {
771
+ "epoch": 13.81,
772
+ "learning_rate": 5.760916030534351e-05,
773
+ "loss": 1.8236,
774
+ "step": 9600
775
+ },
776
+ {
777
+ "epoch": 13.96,
778
+ "learning_rate": 5.738015267175571e-05,
779
+ "loss": 1.8199,
780
+ "step": 9700
781
+ },
782
+ {
783
+ "epoch": 14.1,
784
+ "learning_rate": 5.7151145038167934e-05,
785
+ "loss": 1.8285,
786
+ "step": 9800
787
+ },
788
+ {
789
+ "epoch": 14.24,
790
+ "learning_rate": 5.692213740458015e-05,
791
+ "loss": 1.817,
792
+ "step": 9900
793
+ },
794
+ {
795
+ "epoch": 14.39,
796
+ "learning_rate": 5.669312977099236e-05,
797
+ "loss": 1.819,
798
+ "step": 10000
799
+ },
800
+ {
801
+ "epoch": 14.39,
802
+ "eval_cer": 0.2074476460399937,
803
+ "eval_loss": 0.6015097498893738,
804
+ "eval_runtime": 41.6458,
805
+ "eval_samples_per_second": 10.949,
806
+ "eval_steps_per_second": 1.369,
807
+ "eval_wer": 0.5986106916339474,
808
+ "step": 10000
809
+ },
810
+ {
811
+ "epoch": 14.53,
812
+ "learning_rate": 5.6466412213740455e-05,
813
+ "loss": 1.7952,
814
+ "step": 10100
815
+ },
816
+ {
817
+ "epoch": 14.68,
818
+ "learning_rate": 5.623740458015266e-05,
819
+ "loss": 1.7955,
820
+ "step": 10200
821
+ },
822
+ {
823
+ "epoch": 14.82,
824
+ "learning_rate": 5.600839694656488e-05,
825
+ "loss": 1.7878,
826
+ "step": 10300
827
+ },
828
+ {
829
+ "epoch": 14.96,
830
+ "learning_rate": 5.5779389312977095e-05,
831
+ "loss": 1.769,
832
+ "step": 10400
833
+ },
834
+ {
835
+ "epoch": 15.11,
836
+ "learning_rate": 5.555267175572519e-05,
837
+ "loss": 1.7684,
838
+ "step": 10500
839
+ },
840
+ {
841
+ "epoch": 15.11,
842
+ "eval_cer": 0.19815777042985355,
843
+ "eval_loss": 0.5682193636894226,
844
+ "eval_runtime": 41.2484,
845
+ "eval_samples_per_second": 11.055,
846
+ "eval_steps_per_second": 1.382,
847
+ "eval_wer": 0.5741467834491091,
848
+ "step": 10500
849
+ },
850
+ {
851
+ "epoch": 15.25,
852
+ "learning_rate": 5.53236641221374e-05,
853
+ "loss": 1.7626,
854
+ "step": 10600
855
+ },
856
+ {
857
+ "epoch": 15.4,
858
+ "learning_rate": 5.5094656488549616e-05,
859
+ "loss": 1.7582,
860
+ "step": 10700
861
+ },
862
+ {
863
+ "epoch": 15.54,
864
+ "learning_rate": 5.486564885496182e-05,
865
+ "loss": 1.75,
866
+ "step": 10800
867
+ },
868
+ {
869
+ "epoch": 15.68,
870
+ "learning_rate": 5.463664122137404e-05,
871
+ "loss": 1.735,
872
+ "step": 10900
873
+ },
874
+ {
875
+ "epoch": 15.83,
876
+ "learning_rate": 5.4407633587786256e-05,
877
+ "loss": 1.7195,
878
+ "step": 11000
879
+ },
880
+ {
881
+ "epoch": 15.83,
882
+ "eval_cer": 0.20067705873090852,
883
+ "eval_loss": 0.5385124683380127,
884
+ "eval_runtime": 41.6481,
885
+ "eval_samples_per_second": 10.949,
886
+ "eval_steps_per_second": 1.369,
887
+ "eval_wer": 0.5591966173361522,
888
+ "step": 11000
889
+ },
890
+ {
891
+ "epoch": 15.97,
892
+ "learning_rate": 5.417862595419847e-05,
893
+ "loss": 1.7274,
894
+ "step": 11100
895
+ },
896
+ {
897
+ "epoch": 16.11,
898
+ "learning_rate": 5.3949618320610677e-05,
899
+ "loss": 1.7183,
900
+ "step": 11200
901
+ },
902
+ {
903
+ "epoch": 16.26,
904
+ "learning_rate": 5.37206106870229e-05,
905
+ "loss": 1.7117,
906
+ "step": 11300
907
+ },
908
+ {
909
+ "epoch": 16.4,
910
+ "learning_rate": 5.349160305343511e-05,
911
+ "loss": 1.6918,
912
+ "step": 11400
913
+ },
914
+ {
915
+ "epoch": 16.55,
916
+ "learning_rate": 5.3262595419847324e-05,
917
+ "loss": 1.7044,
918
+ "step": 11500
919
+ },
920
+ {
921
+ "epoch": 16.55,
922
+ "eval_cer": 0.20965202330341678,
923
+ "eval_loss": 0.5361923575401306,
924
+ "eval_runtime": 41.5242,
925
+ "eval_samples_per_second": 10.982,
926
+ "eval_steps_per_second": 1.373,
927
+ "eval_wer": 0.5524010872848082,
928
+ "step": 11500
929
+ },
930
+ {
931
+ "epoch": 16.69,
932
+ "learning_rate": 5.303358778625954e-05,
933
+ "loss": 1.7134,
934
+ "step": 11600
935
+ },
936
+ {
937
+ "epoch": 16.83,
938
+ "learning_rate": 5.280458015267176e-05,
939
+ "loss": 1.7016,
940
+ "step": 11700
941
+ },
942
+ {
943
+ "epoch": 16.98,
944
+ "learning_rate": 5.2575572519083964e-05,
945
+ "loss": 1.7069,
946
+ "step": 11800
947
+ },
948
+ {
949
+ "epoch": 17.12,
950
+ "learning_rate": 5.234656488549618e-05,
951
+ "loss": 1.7046,
952
+ "step": 11900
953
+ },
954
+ {
955
+ "epoch": 17.27,
956
+ "learning_rate": 5.211755725190839e-05,
957
+ "loss": 1.6879,
958
+ "step": 12000
959
+ },
960
+ {
961
+ "epoch": 17.27,
962
+ "eval_cer": 0.20831365139348135,
963
+ "eval_loss": 0.5119141936302185,
964
+ "eval_runtime": 40.4618,
965
+ "eval_samples_per_second": 11.27,
966
+ "eval_steps_per_second": 1.409,
967
+ "eval_wer": 0.5489278163696768,
968
+ "step": 12000
969
+ },
970
+ {
971
+ "epoch": 17.41,
972
+ "learning_rate": 5.188854961832061e-05,
973
+ "loss": 1.681,
974
+ "step": 12100
975
+ },
976
+ {
977
+ "epoch": 17.55,
978
+ "learning_rate": 5.1659541984732825e-05,
979
+ "loss": 1.6683,
980
+ "step": 12200
981
+ },
982
+ {
983
+ "epoch": 17.7,
984
+ "learning_rate": 5.143053435114503e-05,
985
+ "loss": 1.655,
986
+ "step": 12300
987
+ },
988
+ {
989
+ "epoch": 17.84,
990
+ "learning_rate": 5.1201526717557245e-05,
991
+ "loss": 1.6604,
992
+ "step": 12400
993
+ },
994
+ {
995
+ "epoch": 17.98,
996
+ "learning_rate": 5.0972519083969465e-05,
997
+ "loss": 1.656,
998
+ "step": 12500
999
+ },
1000
+ {
1001
+ "epoch": 17.98,
1002
+ "eval_cer": 0.19678003464021415,
1003
+ "eval_loss": 0.4990406930446625,
1004
+ "eval_runtime": 40.5826,
1005
+ "eval_samples_per_second": 11.236,
1006
+ "eval_steps_per_second": 1.405,
1007
+ "eval_wer": 0.5362428269405014,
1008
+ "step": 12500
1009
+ },
1010
+ {
1011
+ "epoch": 18.13,
1012
+ "learning_rate": 5.074351145038168e-05,
1013
+ "loss": 1.6645,
1014
+ "step": 12600
1015
+ },
1016
+ {
1017
+ "epoch": 18.27,
1018
+ "learning_rate": 5.051450381679389e-05,
1019
+ "loss": 1.6269,
1020
+ "step": 12700
1021
+ },
1022
+ {
1023
+ "epoch": 18.42,
1024
+ "learning_rate": 5.02854961832061e-05,
1025
+ "loss": 1.6306,
1026
+ "step": 12800
1027
+ },
1028
+ {
1029
+ "epoch": 18.56,
1030
+ "learning_rate": 5.005877862595419e-05,
1031
+ "loss": 1.6191,
1032
+ "step": 12900
1033
+ },
1034
+ {
1035
+ "epoch": 18.7,
1036
+ "learning_rate": 4.9829770992366406e-05,
1037
+ "loss": 1.6122,
1038
+ "step": 13000
1039
+ },
1040
+ {
1041
+ "epoch": 18.7,
1042
+ "eval_cer": 0.18997008345142496,
1043
+ "eval_loss": 0.45614466071128845,
1044
+ "eval_runtime": 41.2927,
1045
+ "eval_samples_per_second": 11.043,
1046
+ "eval_steps_per_second": 1.38,
1047
+ "eval_wer": 0.5092117185140441,
1048
+ "step": 13000
1049
+ },
1050
+ {
1051
+ "epoch": 18.85,
1052
+ "learning_rate": 4.9600763358778626e-05,
1053
+ "loss": 1.622,
1054
+ "step": 13100
1055
+ },
1056
+ {
1057
+ "epoch": 18.99,
1058
+ "learning_rate": 4.937175572519084e-05,
1059
+ "loss": 1.6305,
1060
+ "step": 13200
1061
+ },
1062
+ {
1063
+ "epoch": 19.14,
1064
+ "learning_rate": 4.9142748091603046e-05,
1065
+ "loss": 1.6134,
1066
+ "step": 13300
1067
+ },
1068
+ {
1069
+ "epoch": 19.28,
1070
+ "learning_rate": 4.891374045801526e-05,
1071
+ "loss": 1.6044,
1072
+ "step": 13400
1073
+ },
1074
+ {
1075
+ "epoch": 19.42,
1076
+ "learning_rate": 4.868473282442748e-05,
1077
+ "loss": 1.5919,
1078
+ "step": 13500
1079
+ },
1080
+ {
1081
+ "epoch": 19.42,
1082
+ "eval_cer": 0.19752794835458984,
1083
+ "eval_loss": 0.47778981924057007,
1084
+ "eval_runtime": 41.5758,
1085
+ "eval_samples_per_second": 10.968,
1086
+ "eval_steps_per_second": 1.371,
1087
+ "eval_wer": 0.5225007550588946,
1088
+ "step": 13500
1089
+ },
1090
+ {
1091
+ "epoch": 19.57,
1092
+ "learning_rate": 4.8455725190839694e-05,
1093
+ "loss": 1.595,
1094
+ "step": 13600
1095
+ },
1096
+ {
1097
+ "epoch": 19.71,
1098
+ "learning_rate": 4.822671755725191e-05,
1099
+ "loss": 1.5959,
1100
+ "step": 13700
1101
+ },
1102
+ {
1103
+ "epoch": 19.86,
1104
+ "learning_rate": 4.7997709923664114e-05,
1105
+ "loss": 1.6006,
1106
+ "step": 13800
1107
+ },
1108
+ {
1109
+ "epoch": 20.0,
1110
+ "learning_rate": 4.7768702290076334e-05,
1111
+ "loss": 1.5913,
1112
+ "step": 13900
1113
+ },
1114
+ {
1115
+ "epoch": 20.14,
1116
+ "learning_rate": 4.753969465648855e-05,
1117
+ "loss": 1.5896,
1118
+ "step": 14000
1119
+ },
1120
+ {
1121
+ "epoch": 20.14,
1122
+ "eval_cer": 0.18591560384191466,
1123
+ "eval_loss": 0.4563109278678894,
1124
+ "eval_runtime": 40.8794,
1125
+ "eval_samples_per_second": 11.155,
1126
+ "eval_steps_per_second": 1.394,
1127
+ "eval_wer": 0.5098157656297191,
1128
+ "step": 14000
1129
+ },
1130
+ {
1131
+ "epoch": 20.29,
1132
+ "learning_rate": 4.731068702290076e-05,
1133
+ "loss": 1.5823,
1134
+ "step": 14100
1135
+ },
1136
+ {
1137
+ "epoch": 20.43,
1138
+ "learning_rate": 4.708167938931297e-05,
1139
+ "loss": 1.5634,
1140
+ "step": 14200
1141
+ },
1142
+ {
1143
+ "epoch": 20.57,
1144
+ "learning_rate": 4.685267175572519e-05,
1145
+ "loss": 1.5573,
1146
+ "step": 14300
1147
+ },
1148
+ {
1149
+ "epoch": 20.72,
1150
+ "learning_rate": 4.66236641221374e-05,
1151
+ "loss": 1.5689,
1152
+ "step": 14400
1153
+ },
1154
+ {
1155
+ "epoch": 20.86,
1156
+ "learning_rate": 4.6394656488549615e-05,
1157
+ "loss": 1.5589,
1158
+ "step": 14500
1159
+ },
1160
+ {
1161
+ "epoch": 20.86,
1162
+ "eval_cer": 0.17249252086285624,
1163
+ "eval_loss": 0.43622052669525146,
1164
+ "eval_runtime": 41.7277,
1165
+ "eval_samples_per_second": 10.928,
1166
+ "eval_steps_per_second": 1.366,
1167
+ "eval_wer": 0.4939595288432498,
1168
+ "step": 14500
1169
+ },
1170
+ {
1171
+ "epoch": 21.01,
1172
+ "learning_rate": 4.616564885496183e-05,
1173
+ "loss": 1.5697,
1174
+ "step": 14600
1175
+ },
1176
+ {
1177
+ "epoch": 21.15,
1178
+ "learning_rate": 4.593664122137405e-05,
1179
+ "loss": 1.5336,
1180
+ "step": 14700
1181
+ },
1182
+ {
1183
+ "epoch": 21.29,
1184
+ "learning_rate": 4.5707633587786255e-05,
1185
+ "loss": 1.5425,
1186
+ "step": 14800
1187
+ },
1188
+ {
1189
+ "epoch": 21.44,
1190
+ "learning_rate": 4.547862595419847e-05,
1191
+ "loss": 1.5461,
1192
+ "step": 14900
1193
+ },
1194
+ {
1195
+ "epoch": 21.58,
1196
+ "learning_rate": 4.524961832061068e-05,
1197
+ "loss": 1.5353,
1198
+ "step": 15000
1199
+ },
1200
+ {
1201
+ "epoch": 21.58,
1202
+ "eval_cer": 0.15804597701149425,
1203
+ "eval_loss": 0.41395294666290283,
1204
+ "eval_runtime": 40.5257,
1205
+ "eval_samples_per_second": 11.252,
1206
+ "eval_steps_per_second": 1.407,
1207
+ "eval_wer": 0.4826336454243431,
1208
+ "step": 15000
1209
+ },
1210
+ {
1211
+ "epoch": 21.73,
1212
+ "learning_rate": 4.5020610687022895e-05,
1213
+ "loss": 1.5348,
1214
+ "step": 15100
1215
+ },
1216
+ {
1217
+ "epoch": 21.87,
1218
+ "learning_rate": 4.4791603053435116e-05,
1219
+ "loss": 1.5279,
1220
+ "step": 15200
1221
+ },
1222
+ {
1223
+ "epoch": 22.01,
1224
+ "learning_rate": 4.456259541984732e-05,
1225
+ "loss": 1.5492,
1226
+ "step": 15300
1227
+ },
1228
+ {
1229
+ "epoch": 22.16,
1230
+ "learning_rate": 4.4333587786259536e-05,
1231
+ "loss": 1.5291,
1232
+ "step": 15400
1233
+ },
1234
+ {
1235
+ "epoch": 22.3,
1236
+ "learning_rate": 4.410458015267175e-05,
1237
+ "loss": 1.5441,
1238
+ "step": 15500
1239
+ },
1240
+ {
1241
+ "epoch": 22.3,
1242
+ "eval_cer": 0.15501495827428752,
1243
+ "eval_loss": 0.40313535928726196,
1244
+ "eval_runtime": 41.0848,
1245
+ "eval_samples_per_second": 11.099,
1246
+ "eval_steps_per_second": 1.387,
1247
+ "eval_wer": 0.47417698580489276,
1248
+ "step": 15500
1249
+ },
1250
+ {
1251
+ "epoch": 22.45,
1252
+ "learning_rate": 4.387557251908397e-05,
1253
+ "loss": 1.518,
1254
+ "step": 15600
1255
+ },
1256
+ {
1257
+ "epoch": 22.59,
1258
+ "learning_rate": 4.364656488549618e-05,
1259
+ "loss": 1.5081,
1260
+ "step": 15700
1261
+ },
1262
+ {
1263
+ "epoch": 22.73,
1264
+ "learning_rate": 4.341755725190839e-05,
1265
+ "loss": 1.4959,
1266
+ "step": 15800
1267
+ },
1268
+ {
1269
+ "epoch": 22.88,
1270
+ "learning_rate": 4.31885496183206e-05,
1271
+ "loss": 1.5097,
1272
+ "step": 15900
1273
+ },
1274
+ {
1275
+ "epoch": 23.02,
1276
+ "learning_rate": 4.295954198473282e-05,
1277
+ "loss": 1.5116,
1278
+ "step": 16000
1279
+ },
1280
+ {
1281
+ "epoch": 23.02,
1282
+ "eval_cer": 0.15450322783813572,
1283
+ "eval_loss": 0.39162585139274597,
1284
+ "eval_runtime": 40.7373,
1285
+ "eval_samples_per_second": 11.194,
1286
+ "eval_steps_per_second": 1.399,
1287
+ "eval_wer": 0.4747810329205678,
1288
+ "step": 16000
1289
+ },
1290
+ {
1291
+ "epoch": 23.17,
1292
+ "learning_rate": 4.273053435114504e-05,
1293
+ "loss": 1.4951,
1294
+ "step": 16100
1295
+ },
1296
+ {
1297
+ "epoch": 23.31,
1298
+ "learning_rate": 4.250152671755724e-05,
1299
+ "loss": 1.4974,
1300
+ "step": 16200
1301
+ },
1302
+ {
1303
+ "epoch": 23.45,
1304
+ "learning_rate": 4.227480916030534e-05,
1305
+ "loss": 1.5045,
1306
+ "step": 16300
1307
+ },
1308
+ {
1309
+ "epoch": 23.6,
1310
+ "learning_rate": 4.204580152671755e-05,
1311
+ "loss": 1.4944,
1312
+ "step": 16400
1313
+ },
1314
+ {
1315
+ "epoch": 23.74,
1316
+ "learning_rate": 4.181679389312977e-05,
1317
+ "loss": 1.4731,
1318
+ "step": 16500
1319
+ },
1320
+ {
1321
+ "epoch": 23.74,
1322
+ "eval_cer": 0.15422768068020784,
1323
+ "eval_loss": 0.3840835392475128,
1324
+ "eval_runtime": 40.8763,
1325
+ "eval_samples_per_second": 11.156,
1326
+ "eval_steps_per_second": 1.394,
1327
+ "eval_wer": 0.4809725158562368,
1328
+ "step": 16500
1329
+ },
1330
+ {
1331
+ "epoch": 23.88,
1332
+ "learning_rate": 4.1587786259541985e-05,
1333
+ "loss": 1.472,
1334
+ "step": 16600
1335
+ },
1336
+ {
1337
+ "epoch": 24.03,
1338
+ "learning_rate": 4.13587786259542e-05,
1339
+ "loss": 1.4847,
1340
+ "step": 16700
1341
+ },
1342
+ {
1343
+ "epoch": 24.17,
1344
+ "learning_rate": 4.1129770992366405e-05,
1345
+ "loss": 1.4603,
1346
+ "step": 16800
1347
+ },
1348
+ {
1349
+ "epoch": 24.32,
1350
+ "learning_rate": 4.090076335877862e-05,
1351
+ "loss": 1.4563,
1352
+ "step": 16900
1353
+ },
1354
+ {
1355
+ "epoch": 24.46,
1356
+ "learning_rate": 4.067175572519084e-05,
1357
+ "loss": 1.4647,
1358
+ "step": 17000
1359
+ },
1360
+ {
1361
+ "epoch": 24.46,
1362
+ "eval_cer": 0.14753582113053063,
1363
+ "eval_loss": 0.37518319487571716,
1364
+ "eval_runtime": 41.0205,
1365
+ "eval_samples_per_second": 11.116,
1366
+ "eval_steps_per_second": 1.39,
1367
+ "eval_wer": 0.452431289640592,
1368
+ "step": 17000
1369
+ },
1370
+ {
1371
+ "epoch": 24.6,
1372
+ "learning_rate": 4.044274809160305e-05,
1373
+ "loss": 1.4585,
1374
+ "step": 17100
1375
+ },
1376
+ {
1377
+ "epoch": 24.75,
1378
+ "learning_rate": 4.021374045801526e-05,
1379
+ "loss": 1.4692,
1380
+ "step": 17200
1381
+ },
1382
+ {
1383
+ "epoch": 24.89,
1384
+ "learning_rate": 3.998473282442747e-05,
1385
+ "loss": 1.444,
1386
+ "step": 17300
1387
+ },
1388
+ {
1389
+ "epoch": 25.04,
1390
+ "learning_rate": 3.975572519083969e-05,
1391
+ "loss": 1.4717,
1392
+ "step": 17400
1393
+ },
1394
+ {
1395
+ "epoch": 25.18,
1396
+ "learning_rate": 3.9526717557251906e-05,
1397
+ "loss": 1.4328,
1398
+ "step": 17500
1399
+ },
1400
+ {
1401
+ "epoch": 25.18,
1402
+ "eval_cer": 0.1461187214611872,
1403
+ "eval_loss": 0.35870596766471863,
1404
+ "eval_runtime": 40.6723,
1405
+ "eval_samples_per_second": 11.212,
1406
+ "eval_steps_per_second": 1.401,
1407
+ "eval_wer": 0.4475989127151918,
1408
+ "step": 17500
1409
+ },
1410
+ {
1411
+ "epoch": 25.32,
1412
+ "learning_rate": 3.929770992366412e-05,
1413
+ "loss": 1.4329,
1414
+ "step": 17600
1415
+ },
1416
+ {
1417
+ "epoch": 25.47,
1418
+ "learning_rate": 3.9068702290076326e-05,
1419
+ "loss": 1.4209,
1420
+ "step": 17700
1421
+ },
1422
+ {
1423
+ "epoch": 25.61,
1424
+ "learning_rate": 3.884198473282442e-05,
1425
+ "loss": 1.4188,
1426
+ "step": 17800
1427
+ },
1428
+ {
1429
+ "epoch": 25.75,
1430
+ "learning_rate": 3.861297709923664e-05,
1431
+ "loss": 1.4301,
1432
+ "step": 17900
1433
+ },
1434
+ {
1435
+ "epoch": 25.9,
1436
+ "learning_rate": 3.8383969465648854e-05,
1437
+ "loss": 1.4129,
1438
+ "step": 18000
1439
+ },
1440
+ {
1441
+ "epoch": 25.9,
1442
+ "eval_cer": 0.13663202645252717,
1443
+ "eval_loss": 0.3428773581981659,
1444
+ "eval_runtime": 42.0192,
1445
+ "eval_samples_per_second": 10.852,
1446
+ "eval_steps_per_second": 1.357,
1447
+ "eval_wer": 0.42419208698278466,
1448
+ "step": 18000
1449
+ },
1450
+ {
1451
+ "epoch": 26.04,
1452
+ "learning_rate": 3.815496183206107e-05,
1453
+ "loss": 1.4266,
1454
+ "step": 18100
1455
+ },
1456
+ {
1457
+ "epoch": 26.19,
1458
+ "learning_rate": 3.7925954198473274e-05,
1459
+ "loss": 1.4166,
1460
+ "step": 18200
1461
+ },
1462
+ {
1463
+ "epoch": 26.33,
1464
+ "learning_rate": 3.7696946564885494e-05,
1465
+ "loss": 1.4157,
1466
+ "step": 18300
1467
+ },
1468
+ {
1469
+ "epoch": 26.47,
1470
+ "learning_rate": 3.746793893129771e-05,
1471
+ "loss": 1.4285,
1472
+ "step": 18400
1473
+ },
1474
+ {
1475
+ "epoch": 26.62,
1476
+ "learning_rate": 3.723893129770992e-05,
1477
+ "loss": 1.4062,
1478
+ "step": 18500
1479
+ },
1480
+ {
1481
+ "epoch": 26.62,
1482
+ "eval_cer": 0.13549047394111163,
1483
+ "eval_loss": 0.34499478340148926,
1484
+ "eval_runtime": 41.0336,
1485
+ "eval_samples_per_second": 11.113,
1486
+ "eval_steps_per_second": 1.389,
1487
+ "eval_wer": 0.4250981576562972,
1488
+ "step": 18500
1489
+ },
1490
+ {
1491
+ "epoch": 26.76,
1492
+ "learning_rate": 3.7009923664122134e-05,
1493
+ "loss": 1.4163,
1494
+ "step": 18600
1495
+ },
1496
+ {
1497
+ "epoch": 26.91,
1498
+ "learning_rate": 3.678091603053435e-05,
1499
+ "loss": 1.404,
1500
+ "step": 18700
1501
+ },
1502
+ {
1503
+ "epoch": 27.05,
1504
+ "learning_rate": 3.655190839694656e-05,
1505
+ "loss": 1.4134,
1506
+ "step": 18800
1507
+ },
1508
+ {
1509
+ "epoch": 27.19,
1510
+ "learning_rate": 3.6322900763358775e-05,
1511
+ "loss": 1.4001,
1512
+ "step": 18900
1513
+ },
1514
+ {
1515
+ "epoch": 27.34,
1516
+ "learning_rate": 3.609389312977099e-05,
1517
+ "loss": 1.3928,
1518
+ "step": 19000
1519
+ },
1520
+ {
1521
+ "epoch": 27.34,
1522
+ "eval_cer": 0.13218390804597702,
1523
+ "eval_loss": 0.32969579100608826,
1524
+ "eval_runtime": 41.0801,
1525
+ "eval_samples_per_second": 11.1,
1526
+ "eval_steps_per_second": 1.388,
1527
+ "eval_wer": 0.4145273331319843,
1528
+ "step": 19000
1529
+ },
1530
+ {
1531
+ "epoch": 27.48,
1532
+ "learning_rate": 3.58648854961832e-05,
1533
+ "loss": 1.3979,
1534
+ "step": 19100
1535
+ },
1536
+ {
1537
+ "epoch": 27.63,
1538
+ "learning_rate": 3.5635877862595415e-05,
1539
+ "loss": 1.3971,
1540
+ "step": 19200
1541
+ },
1542
+ {
1543
+ "epoch": 27.77,
1544
+ "learning_rate": 3.540687022900763e-05,
1545
+ "loss": 1.3934,
1546
+ "step": 19300
1547
+ },
1548
+ {
1549
+ "epoch": 27.91,
1550
+ "learning_rate": 3.517786259541984e-05,
1551
+ "loss": 1.3866,
1552
+ "step": 19400
1553
+ },
1554
+ {
1555
+ "epoch": 28.06,
1556
+ "learning_rate": 3.4948854961832055e-05,
1557
+ "loss": 1.3906,
1558
+ "step": 19500
1559
+ },
1560
+ {
1561
+ "epoch": 28.06,
1562
+ "eval_cer": 0.1336403715950244,
1563
+ "eval_loss": 0.32101842761039734,
1564
+ "eval_runtime": 41.0367,
1565
+ "eval_samples_per_second": 11.112,
1566
+ "eval_steps_per_second": 1.389,
1567
+ "eval_wer": 0.4184536393838719,
1568
+ "step": 19500
1569
+ },
1570
+ {
1571
+ "epoch": 28.2,
1572
+ "learning_rate": 3.471984732824427e-05,
1573
+ "loss": 1.3689,
1574
+ "step": 19600
1575
+ },
1576
+ {
1577
+ "epoch": 28.34,
1578
+ "learning_rate": 3.449083969465649e-05,
1579
+ "loss": 1.3715,
1580
+ "step": 19700
1581
+ },
1582
+ {
1583
+ "epoch": 28.49,
1584
+ "learning_rate": 3.4261832061068696e-05,
1585
+ "loss": 1.3527,
1586
+ "step": 19800
1587
+ },
1588
+ {
1589
+ "epoch": 28.63,
1590
+ "learning_rate": 3.4032824427480916e-05,
1591
+ "loss": 1.3532,
1592
+ "step": 19900
1593
+ },
1594
+ {
1595
+ "epoch": 28.78,
1596
+ "learning_rate": 3.380381679389312e-05,
1597
+ "loss": 1.358,
1598
+ "step": 20000
1599
+ },
1600
+ {
1601
+ "epoch": 28.78,
1602
+ "eval_cer": 0.12753897024090693,
1603
+ "eval_loss": 0.31306591629981995,
1604
+ "eval_runtime": 41.2359,
1605
+ "eval_samples_per_second": 11.058,
1606
+ "eval_steps_per_second": 1.382,
1607
+ "eval_wer": 0.39700996677740863,
1608
+ "step": 20000
1609
+ },
1610
+ {
1611
+ "epoch": 28.92,
1612
+ "learning_rate": 3.357480916030534e-05,
1613
+ "loss": 1.3582,
1614
+ "step": 20100
1615
+ },
1616
+ {
1617
+ "epoch": 29.06,
1618
+ "learning_rate": 3.334580152671755e-05,
1619
+ "loss": 1.3587,
1620
+ "step": 20200
1621
+ },
1622
+ {
1623
+ "epoch": 29.21,
1624
+ "learning_rate": 3.311679389312977e-05,
1625
+ "loss": 1.3392,
1626
+ "step": 20300
1627
+ },
1628
+ {
1629
+ "epoch": 29.35,
1630
+ "learning_rate": 3.288778625954198e-05,
1631
+ "loss": 1.3486,
1632
+ "step": 20400
1633
+ },
1634
+ {
1635
+ "epoch": 29.5,
1636
+ "learning_rate": 3.26587786259542e-05,
1637
+ "loss": 1.3445,
1638
+ "step": 20500
1639
+ },
1640
+ {
1641
+ "epoch": 29.5,
1642
+ "eval_cer": 0.12761769800031492,
1643
+ "eval_loss": 0.3069218099117279,
1644
+ "eval_runtime": 41.0687,
1645
+ "eval_samples_per_second": 11.103,
1646
+ "eval_steps_per_second": 1.388,
1647
+ "eval_wer": 0.3920265780730897,
1648
+ "step": 20500
1649
+ },
1650
+ {
1651
+ "epoch": 29.64,
1652
+ "learning_rate": 3.242977099236641e-05,
1653
+ "loss": 1.3354,
1654
+ "step": 20600
1655
+ },
1656
+ {
1657
+ "epoch": 29.78,
1658
+ "learning_rate": 3.2200763358778624e-05,
1659
+ "loss": 1.3334,
1660
+ "step": 20700
1661
+ },
1662
+ {
1663
+ "epoch": 29.93,
1664
+ "learning_rate": 3.197175572519084e-05,
1665
+ "loss": 1.3305,
1666
+ "step": 20800
1667
+ },
1668
+ {
1669
+ "epoch": 30.07,
1670
+ "learning_rate": 3.174274809160305e-05,
1671
+ "loss": 1.3354,
1672
+ "step": 20900
1673
+ },
1674
+ {
1675
+ "epoch": 30.22,
1676
+ "learning_rate": 3.1513740458015264e-05,
1677
+ "loss": 1.3159,
1678
+ "step": 21000
1679
+ },
1680
+ {
1681
+ "epoch": 30.22,
1682
+ "eval_cer": 0.1254920484962998,
1683
+ "eval_loss": 0.30346596240997314,
1684
+ "eval_runtime": 41.0784,
1685
+ "eval_samples_per_second": 11.101,
1686
+ "eval_steps_per_second": 1.388,
1687
+ "eval_wer": 0.3961038961038961,
1688
+ "step": 21000
1689
+ },
1690
+ {
1691
+ "epoch": 30.36,
1692
+ "learning_rate": 3.128473282442748e-05,
1693
+ "loss": 1.3376,
1694
+ "step": 21100
1695
+ },
1696
+ {
1697
+ "epoch": 30.5,
1698
+ "learning_rate": 3.105572519083969e-05,
1699
+ "loss": 1.324,
1700
+ "step": 21200
1701
+ },
1702
+ {
1703
+ "epoch": 30.65,
1704
+ "learning_rate": 3.0826717557251904e-05,
1705
+ "loss": 1.3091,
1706
+ "step": 21300
1707
+ },
1708
+ {
1709
+ "epoch": 30.79,
1710
+ "learning_rate": 3.059770992366412e-05,
1711
+ "loss": 1.3213,
1712
+ "step": 21400
1713
+ },
1714
+ {
1715
+ "epoch": 30.93,
1716
+ "learning_rate": 3.0368702290076335e-05,
1717
+ "loss": 1.3044,
1718
+ "step": 21500
1719
+ },
1720
+ {
1721
+ "epoch": 30.93,
1722
+ "eval_cer": 0.12423240434577232,
1723
+ "eval_loss": 0.29519879817962646,
1724
+ "eval_runtime": 41.1753,
1725
+ "eval_samples_per_second": 11.075,
1726
+ "eval_steps_per_second": 1.384,
1727
+ "eval_wer": 0.3853820598006645,
1728
+ "step": 21500
1729
+ },
1730
+ {
1731
+ "epoch": 31.08,
1732
+ "learning_rate": 3.0139694656488545e-05,
1733
+ "loss": 1.3033,
1734
+ "step": 21600
1735
+ },
1736
+ {
1737
+ "epoch": 31.22,
1738
+ "learning_rate": 2.991068702290076e-05,
1739
+ "loss": 1.2995,
1740
+ "step": 21700
1741
+ },
1742
+ {
1743
+ "epoch": 31.37,
1744
+ "learning_rate": 2.9681679389312975e-05,
1745
+ "loss": 1.3101,
1746
+ "step": 21800
1747
+ },
1748
+ {
1749
+ "epoch": 31.51,
1750
+ "learning_rate": 2.945267175572519e-05,
1751
+ "loss": 1.304,
1752
+ "step": 21900
1753
+ },
1754
+ {
1755
+ "epoch": 31.65,
1756
+ "learning_rate": 2.9223664122137402e-05,
1757
+ "loss": 1.3034,
1758
+ "step": 22000
1759
+ },
1760
+ {
1761
+ "epoch": 31.65,
1762
+ "eval_cer": 0.12273657691702095,
1763
+ "eval_loss": 0.29660850763320923,
1764
+ "eval_runtime": 41.8912,
1765
+ "eval_samples_per_second": 10.885,
1766
+ "eval_steps_per_second": 1.361,
1767
+ "eval_wer": 0.37722742373905166,
1768
+ "step": 22000
1769
+ },
1770
+ {
1771
+ "epoch": 31.8,
1772
+ "learning_rate": 2.8994656488549615e-05,
1773
+ "loss": 1.2912,
1774
+ "step": 22100
1775
+ },
1776
+ {
1777
+ "epoch": 31.94,
1778
+ "learning_rate": 2.876564885496183e-05,
1779
+ "loss": 1.299,
1780
+ "step": 22200
1781
+ },
1782
+ {
1783
+ "epoch": 32.09,
1784
+ "learning_rate": 2.8536641221374046e-05,
1785
+ "loss": 1.3042,
1786
+ "step": 22300
1787
+ },
1788
+ {
1789
+ "epoch": 32.23,
1790
+ "learning_rate": 2.8307633587786256e-05,
1791
+ "loss": 1.294,
1792
+ "step": 22400
1793
+ },
1794
+ {
1795
+ "epoch": 32.37,
1796
+ "learning_rate": 2.8078625954198472e-05,
1797
+ "loss": 1.2963,
1798
+ "step": 22500
1799
+ },
1800
+ {
1801
+ "epoch": 32.37,
1802
+ "eval_cer": 0.12080774681152574,
1803
+ "eval_loss": 0.2843906879425049,
1804
+ "eval_runtime": 41.7644,
1805
+ "eval_samples_per_second": 10.918,
1806
+ "eval_steps_per_second": 1.365,
1807
+ "eval_wer": 0.3705829054666264,
1808
+ "step": 22500
1809
+ },
1810
+ {
1811
+ "epoch": 32.52,
1812
+ "learning_rate": 2.7849618320610682e-05,
1813
+ "loss": 1.2769,
1814
+ "step": 22600
1815
+ },
1816
+ {
1817
+ "epoch": 32.66,
1818
+ "learning_rate": 2.76206106870229e-05,
1819
+ "loss": 1.2812,
1820
+ "step": 22700
1821
+ },
1822
+ {
1823
+ "epoch": 32.8,
1824
+ "learning_rate": 2.7391603053435113e-05,
1825
+ "loss": 1.2827,
1826
+ "step": 22800
1827
+ },
1828
+ {
1829
+ "epoch": 32.95,
1830
+ "learning_rate": 2.7162595419847326e-05,
1831
+ "loss": 1.2747,
1832
+ "step": 22900
1833
+ },
1834
+ {
1835
+ "epoch": 33.09,
1836
+ "learning_rate": 2.6935877862595417e-05,
1837
+ "loss": 1.2765,
1838
+ "step": 23000
1839
+ },
1840
+ {
1841
+ "epoch": 33.09,
1842
+ "eval_cer": 0.11726499763816722,
1843
+ "eval_loss": 0.28407707810401917,
1844
+ "eval_runtime": 40.9894,
1845
+ "eval_samples_per_second": 11.125,
1846
+ "eval_steps_per_second": 1.391,
1847
+ "eval_wer": 0.35668982180610087,
1848
+ "step": 23000
1849
+ },
1850
+ {
1851
+ "epoch": 33.24,
1852
+ "learning_rate": 2.670687022900763e-05,
1853
+ "loss": 1.2785,
1854
+ "step": 23100
1855
+ },
1856
+ {
1857
+ "epoch": 33.38,
1858
+ "learning_rate": 2.6477862595419844e-05,
1859
+ "loss": 1.2644,
1860
+ "step": 23200
1861
+ },
1862
+ {
1863
+ "epoch": 33.52,
1864
+ "learning_rate": 2.624885496183206e-05,
1865
+ "loss": 1.2724,
1866
+ "step": 23300
1867
+ },
1868
+ {
1869
+ "epoch": 33.67,
1870
+ "learning_rate": 2.601984732824427e-05,
1871
+ "loss": 1.2551,
1872
+ "step": 23400
1873
+ },
1874
+ {
1875
+ "epoch": 33.81,
1876
+ "learning_rate": 2.5790839694656488e-05,
1877
+ "loss": 1.2438,
1878
+ "step": 23500
1879
+ },
1880
+ {
1881
+ "epoch": 33.81,
1882
+ "eval_cer": 0.11372224846480869,
1883
+ "eval_loss": 0.2734295129776001,
1884
+ "eval_runtime": 41.7199,
1885
+ "eval_samples_per_second": 10.93,
1886
+ "eval_steps_per_second": 1.366,
1887
+ "eval_wer": 0.35517970401691334,
1888
+ "step": 23500
1889
+ },
1890
+ {
1891
+ "epoch": 33.96,
1892
+ "learning_rate": 2.5561832061068698e-05,
1893
+ "loss": 1.2491,
1894
+ "step": 23600
1895
+ },
1896
+ {
1897
+ "epoch": 34.1,
1898
+ "learning_rate": 2.5332824427480915e-05,
1899
+ "loss": 1.252,
1900
+ "step": 23700
1901
+ },
1902
+ {
1903
+ "epoch": 34.24,
1904
+ "learning_rate": 2.5103816793893128e-05,
1905
+ "loss": 1.2467,
1906
+ "step": 23800
1907
+ },
1908
+ {
1909
+ "epoch": 34.39,
1910
+ "learning_rate": 2.487480916030534e-05,
1911
+ "loss": 1.2406,
1912
+ "step": 23900
1913
+ },
1914
+ {
1915
+ "epoch": 34.53,
1916
+ "learning_rate": 2.4645801526717555e-05,
1917
+ "loss": 1.2487,
1918
+ "step": 24000
1919
+ },
1920
+ {
1921
+ "epoch": 34.53,
1922
+ "eval_cer": 0.11179341835931349,
1923
+ "eval_loss": 0.2702818512916565,
1924
+ "eval_runtime": 41.8515,
1925
+ "eval_samples_per_second": 10.896,
1926
+ "eval_steps_per_second": 1.362,
1927
+ "eval_wer": 0.3501963153125944,
1928
+ "step": 24000
1929
+ },
1930
+ {
1931
+ "epoch": 34.68,
1932
+ "learning_rate": 2.441679389312977e-05,
1933
+ "loss": 1.2504,
1934
+ "step": 24100
1935
+ },
1936
+ {
1937
+ "epoch": 34.82,
1938
+ "learning_rate": 2.4187786259541982e-05,
1939
+ "loss": 1.2341,
1940
+ "step": 24200
1941
+ },
1942
+ {
1943
+ "epoch": 34.96,
1944
+ "learning_rate": 2.39587786259542e-05,
1945
+ "loss": 1.2477,
1946
+ "step": 24300
1947
+ },
1948
+ {
1949
+ "epoch": 35.11,
1950
+ "learning_rate": 2.372977099236641e-05,
1951
+ "loss": 1.2427,
1952
+ "step": 24400
1953
+ },
1954
+ {
1955
+ "epoch": 35.25,
1956
+ "learning_rate": 2.3500763358778626e-05,
1957
+ "loss": 1.2249,
1958
+ "step": 24500
1959
+ },
1960
+ {
1961
+ "epoch": 35.25,
1962
+ "eval_cer": 0.11423397890096047,
1963
+ "eval_loss": 0.2650163471698761,
1964
+ "eval_runtime": 41.2103,
1965
+ "eval_samples_per_second": 11.065,
1966
+ "eval_steps_per_second": 1.383,
1967
+ "eval_wer": 0.3483841739655693,
1968
+ "step": 24500
1969
+ },
1970
+ {
1971
+ "epoch": 35.4,
1972
+ "learning_rate": 2.3271755725190836e-05,
1973
+ "loss": 1.2265,
1974
+ "step": 24600
1975
+ },
1976
+ {
1977
+ "epoch": 35.54,
1978
+ "learning_rate": 2.3042748091603052e-05,
1979
+ "loss": 1.2276,
1980
+ "step": 24700
1981
+ },
1982
+ {
1983
+ "epoch": 35.68,
1984
+ "learning_rate": 2.2816030534351143e-05,
1985
+ "loss": 1.2332,
1986
+ "step": 24800
1987
+ },
1988
+ {
1989
+ "epoch": 35.83,
1990
+ "learning_rate": 2.2587022900763357e-05,
1991
+ "loss": 1.2249,
1992
+ "step": 24900
1993
+ },
1994
+ {
1995
+ "epoch": 35.97,
1996
+ "learning_rate": 2.235801526717557e-05,
1997
+ "loss": 1.2229,
1998
+ "step": 25000
1999
+ },
2000
+ {
2001
+ "epoch": 35.97,
2002
+ "eval_cer": 0.10970713273500236,
2003
+ "eval_loss": 0.25843024253845215,
2004
+ "eval_runtime": 42.815,
2005
+ "eval_samples_per_second": 10.65,
2006
+ "eval_steps_per_second": 1.331,
2007
+ "eval_wer": 0.3373603141045001,
2008
+ "step": 25000
2009
+ },
2010
+ {
2011
+ "epoch": 36.11,
2012
+ "learning_rate": 2.2129007633587784e-05,
2013
+ "loss": 1.2412,
2014
+ "step": 25100
2015
+ },
2016
+ {
2017
+ "epoch": 36.26,
2018
+ "learning_rate": 2.1899999999999997e-05,
2019
+ "loss": 1.212,
2020
+ "step": 25200
2021
+ },
2022
+ {
2023
+ "epoch": 36.4,
2024
+ "learning_rate": 2.1670992366412214e-05,
2025
+ "loss": 1.2151,
2026
+ "step": 25300
2027
+ },
2028
+ {
2029
+ "epoch": 36.55,
2030
+ "learning_rate": 2.1441984732824424e-05,
2031
+ "loss": 1.2303,
2032
+ "step": 25400
2033
+ },
2034
+ {
2035
+ "epoch": 36.69,
2036
+ "learning_rate": 2.121297709923664e-05,
2037
+ "loss": 1.2374,
2038
+ "step": 25500
2039
+ },
2040
+ {
2041
+ "epoch": 36.69,
2042
+ "eval_cer": 0.10951031333648244,
2043
+ "eval_loss": 0.2568279504776001,
2044
+ "eval_runtime": 41.6839,
2045
+ "eval_samples_per_second": 10.939,
2046
+ "eval_steps_per_second": 1.367,
2047
+ "eval_wer": 0.33373603141045,
2048
+ "step": 25500
2049
+ },
2050
+ {
2051
+ "epoch": 36.83,
2052
+ "learning_rate": 2.098396946564885e-05,
2053
+ "loss": 1.2152,
2054
+ "step": 25600
2055
+ },
2056
+ {
2057
+ "epoch": 36.98,
2058
+ "learning_rate": 2.0754961832061068e-05,
2059
+ "loss": 1.2089,
2060
+ "step": 25700
2061
+ },
2062
+ {
2063
+ "epoch": 37.12,
2064
+ "learning_rate": 2.052595419847328e-05,
2065
+ "loss": 1.2201,
2066
+ "step": 25800
2067
+ },
2068
+ {
2069
+ "epoch": 37.27,
2070
+ "learning_rate": 2.0296946564885495e-05,
2071
+ "loss": 1.2006,
2072
+ "step": 25900
2073
+ },
2074
+ {
2075
+ "epoch": 37.41,
2076
+ "learning_rate": 2.0067938931297708e-05,
2077
+ "loss": 1.2153,
2078
+ "step": 26000
2079
+ },
2080
+ {
2081
+ "epoch": 37.41,
2082
+ "eval_cer": 0.10710911667453944,
2083
+ "eval_loss": 0.24941784143447876,
2084
+ "eval_runtime": 41.3494,
2085
+ "eval_samples_per_second": 11.028,
2086
+ "eval_steps_per_second": 1.378,
2087
+ "eval_wer": 0.33267894895801875,
2088
+ "step": 26000
2089
+ },
2090
+ {
2091
+ "epoch": 37.55,
2092
+ "learning_rate": 1.983893129770992e-05,
2093
+ "loss": 1.2071,
2094
+ "step": 26100
2095
+ },
2096
+ {
2097
+ "epoch": 37.7,
2098
+ "learning_rate": 1.9609923664122135e-05,
2099
+ "loss": 1.2042,
2100
+ "step": 26200
2101
+ },
2102
+ {
2103
+ "epoch": 37.84,
2104
+ "learning_rate": 1.9380916030534352e-05,
2105
+ "loss": 1.2037,
2106
+ "step": 26300
2107
+ },
2108
+ {
2109
+ "epoch": 37.98,
2110
+ "learning_rate": 1.9151908396946562e-05,
2111
+ "loss": 1.1962,
2112
+ "step": 26400
2113
+ },
2114
+ {
2115
+ "epoch": 38.13,
2116
+ "learning_rate": 1.892290076335878e-05,
2117
+ "loss": 1.1925,
2118
+ "step": 26500
2119
+ },
2120
+ {
2121
+ "epoch": 38.13,
2122
+ "eval_cer": 0.1076995748700992,
2123
+ "eval_loss": 0.2518324553966522,
2124
+ "eval_runtime": 40.748,
2125
+ "eval_samples_per_second": 11.191,
2126
+ "eval_steps_per_second": 1.399,
2127
+ "eval_wer": 0.33660525520990636,
2128
+ "step": 26500
2129
+ },
2130
+ {
2131
+ "epoch": 38.27,
2132
+ "learning_rate": 1.869389312977099e-05,
2133
+ "loss": 1.1969,
2134
+ "step": 26600
2135
+ },
2136
+ {
2137
+ "epoch": 38.42,
2138
+ "learning_rate": 1.8464885496183202e-05,
2139
+ "loss": 1.1947,
2140
+ "step": 26700
2141
+ },
2142
+ {
2143
+ "epoch": 38.56,
2144
+ "learning_rate": 1.823587786259542e-05,
2145
+ "loss": 1.2005,
2146
+ "step": 26800
2147
+ },
2148
+ {
2149
+ "epoch": 38.7,
2150
+ "learning_rate": 1.8006870229007632e-05,
2151
+ "loss": 1.1961,
2152
+ "step": 26900
2153
+ },
2154
+ {
2155
+ "epoch": 38.85,
2156
+ "learning_rate": 1.7777862595419846e-05,
2157
+ "loss": 1.1908,
2158
+ "step": 27000
2159
+ },
2160
+ {
2161
+ "epoch": 38.85,
2162
+ "eval_cer": 0.10565265312549205,
2163
+ "eval_loss": 0.24367305636405945,
2164
+ "eval_runtime": 41.2308,
2165
+ "eval_samples_per_second": 11.06,
2166
+ "eval_steps_per_second": 1.382,
2167
+ "eval_wer": 0.3272425249169435,
2168
+ "step": 27000
2169
+ },
2170
+ {
2171
+ "epoch": 38.99,
2172
+ "learning_rate": 1.754885496183206e-05,
2173
+ "loss": 1.1762,
2174
+ "step": 27100
2175
+ },
2176
+ {
2177
+ "epoch": 39.14,
2178
+ "learning_rate": 1.7319847328244273e-05,
2179
+ "loss": 1.2018,
2180
+ "step": 27200
2181
+ },
2182
+ {
2183
+ "epoch": 39.28,
2184
+ "learning_rate": 1.7090839694656486e-05,
2185
+ "loss": 1.1822,
2186
+ "step": 27300
2187
+ },
2188
+ {
2189
+ "epoch": 39.42,
2190
+ "learning_rate": 1.68618320610687e-05,
2191
+ "loss": 1.1745,
2192
+ "step": 27400
2193
+ },
2194
+ {
2195
+ "epoch": 39.57,
2196
+ "learning_rate": 1.6632824427480913e-05,
2197
+ "loss": 1.1858,
2198
+ "step": 27500
2199
+ },
2200
+ {
2201
+ "epoch": 39.57,
2202
+ "eval_cer": 0.10443237285466855,
2203
+ "eval_loss": 0.23960824310779572,
2204
+ "eval_runtime": 42.517,
2205
+ "eval_samples_per_second": 10.725,
2206
+ "eval_steps_per_second": 1.341,
2207
+ "eval_wer": 0.32648746602234974,
2208
+ "step": 27500
2209
+ },
2210
+ {
2211
+ "epoch": 39.71,
2212
+ "learning_rate": 1.6403816793893127e-05,
2213
+ "loss": 1.1866,
2214
+ "step": 27600
2215
+ },
2216
+ {
2217
+ "epoch": 39.86,
2218
+ "learning_rate": 1.617480916030534e-05,
2219
+ "loss": 1.1878,
2220
+ "step": 27700
2221
+ },
2222
+ {
2223
+ "epoch": 40.0,
2224
+ "learning_rate": 1.5945801526717557e-05,
2225
+ "loss": 1.1817,
2226
+ "step": 27800
2227
+ },
2228
+ {
2229
+ "epoch": 40.14,
2230
+ "learning_rate": 1.571679389312977e-05,
2231
+ "loss": 1.1851,
2232
+ "step": 27900
2233
+ },
2234
+ {
2235
+ "epoch": 40.29,
2236
+ "learning_rate": 1.5487786259541984e-05,
2237
+ "loss": 1.1808,
2238
+ "step": 28000
2239
+ },
2240
+ {
2241
+ "epoch": 40.29,
2242
+ "eval_cer": 0.10277908990710125,
2243
+ "eval_loss": 0.2373325228691101,
2244
+ "eval_runtime": 41.3513,
2245
+ "eval_samples_per_second": 11.027,
2246
+ "eval_steps_per_second": 1.378,
2247
+ "eval_wer": 0.31561461794019935,
2248
+ "step": 28000
2249
+ },
2250
+ {
2251
+ "epoch": 40.43,
2252
+ "learning_rate": 1.5258778625954197e-05,
2253
+ "loss": 1.1558,
2254
+ "step": 28100
2255
+ },
2256
+ {
2257
+ "epoch": 40.57,
2258
+ "learning_rate": 1.502977099236641e-05,
2259
+ "loss": 1.1804,
2260
+ "step": 28200
2261
+ },
2262
+ {
2263
+ "epoch": 40.72,
2264
+ "learning_rate": 1.4800763358778624e-05,
2265
+ "loss": 1.1736,
2266
+ "step": 28300
2267
+ },
2268
+ {
2269
+ "epoch": 40.86,
2270
+ "learning_rate": 1.4571755725190838e-05,
2271
+ "loss": 1.1782,
2272
+ "step": 28400
2273
+ },
2274
+ {
2275
+ "epoch": 41.01,
2276
+ "learning_rate": 1.4342748091603053e-05,
2277
+ "loss": 1.1842,
2278
+ "step": 28500
2279
+ },
2280
+ {
2281
+ "epoch": 41.01,
2282
+ "eval_cer": 0.10258227050858132,
2283
+ "eval_loss": 0.23562349379062653,
2284
+ "eval_runtime": 40.492,
2285
+ "eval_samples_per_second": 11.261,
2286
+ "eval_steps_per_second": 1.408,
2287
+ "eval_wer": 0.31516158260344307,
2288
+ "step": 28500
2289
+ },
2290
+ {
2291
+ "epoch": 41.15,
2292
+ "learning_rate": 1.4113740458015266e-05,
2293
+ "loss": 1.1595,
2294
+ "step": 28600
2295
+ },
2296
+ {
2297
+ "epoch": 41.29,
2298
+ "learning_rate": 1.388473282442748e-05,
2299
+ "loss": 1.1527,
2300
+ "step": 28700
2301
+ },
2302
+ {
2303
+ "epoch": 41.44,
2304
+ "learning_rate": 1.3655725190839693e-05,
2305
+ "loss": 1.1517,
2306
+ "step": 28800
2307
+ },
2308
+ {
2309
+ "epoch": 41.58,
2310
+ "learning_rate": 1.3426717557251907e-05,
2311
+ "loss": 1.1609,
2312
+ "step": 28900
2313
+ },
2314
+ {
2315
+ "epoch": 41.73,
2316
+ "learning_rate": 1.3197709923664122e-05,
2317
+ "loss": 1.1668,
2318
+ "step": 29000
2319
+ },
2320
+ {
2321
+ "epoch": 41.73,
2322
+ "eval_cer": 0.10246417886946937,
2323
+ "eval_loss": 0.23187227547168732,
2324
+ "eval_runtime": 40.5813,
2325
+ "eval_samples_per_second": 11.237,
2326
+ "eval_steps_per_second": 1.405,
2327
+ "eval_wer": 0.3187858652974932,
2328
+ "step": 29000
2329
+ },
2330
+ {
2331
+ "epoch": 41.87,
2332
+ "learning_rate": 1.2968702290076335e-05,
2333
+ "loss": 1.1536,
2334
+ "step": 29100
2335
+ },
2336
+ {
2337
+ "epoch": 42.01,
2338
+ "learning_rate": 1.2739694656488549e-05,
2339
+ "loss": 1.1649,
2340
+ "step": 29200
2341
+ },
2342
+ {
2343
+ "epoch": 42.16,
2344
+ "learning_rate": 1.2510687022900762e-05,
2345
+ "loss": 1.1459,
2346
+ "step": 29300
2347
+ },
2348
+ {
2349
+ "epoch": 42.3,
2350
+ "learning_rate": 1.2281679389312975e-05,
2351
+ "loss": 1.1495,
2352
+ "step": 29400
2353
+ },
2354
+ {
2355
+ "epoch": 42.45,
2356
+ "learning_rate": 1.205267175572519e-05,
2357
+ "loss": 1.1448,
2358
+ "step": 29500
2359
+ },
2360
+ {
2361
+ "epoch": 42.45,
2362
+ "eval_cer": 0.09947252401196661,
2363
+ "eval_loss": 0.2292834371328354,
2364
+ "eval_runtime": 41.732,
2365
+ "eval_samples_per_second": 10.927,
2366
+ "eval_steps_per_second": 1.366,
2367
+ "eval_wer": 0.3098761703412866,
2368
+ "step": 29500
2369
+ },
2370
+ {
2371
+ "epoch": 42.59,
2372
+ "learning_rate": 1.1823664122137404e-05,
2373
+ "loss": 1.1408,
2374
+ "step": 29600
2375
+ },
2376
+ {
2377
+ "epoch": 42.73,
2378
+ "learning_rate": 1.1594656488549618e-05,
2379
+ "loss": 1.1458,
2380
+ "step": 29700
2381
+ },
2382
+ {
2383
+ "epoch": 42.88,
2384
+ "learning_rate": 1.1365648854961831e-05,
2385
+ "loss": 1.1358,
2386
+ "step": 29800
2387
+ },
2388
+ {
2389
+ "epoch": 43.02,
2390
+ "learning_rate": 1.1136641221374044e-05,
2391
+ "loss": 1.1519,
2392
+ "step": 29900
2393
+ },
2394
+ {
2395
+ "epoch": 43.17,
2396
+ "learning_rate": 1.0909923664122137e-05,
2397
+ "loss": 1.1327,
2398
+ "step": 30000
2399
+ },
2400
+ {
2401
+ "epoch": 43.17,
2402
+ "eval_cer": 0.09793733270351125,
2403
+ "eval_loss": 0.2265164852142334,
2404
+ "eval_runtime": 40.9338,
2405
+ "eval_samples_per_second": 11.14,
2406
+ "eval_steps_per_second": 1.392,
2407
+ "eval_wer": 0.3047417698580489,
2408
+ "step": 30000
2409
+ },
2410
+ {
2411
+ "epoch": 43.31,
2412
+ "learning_rate": 1.068091603053435e-05,
2413
+ "loss": 1.1322,
2414
+ "step": 30100
2415
+ },
2416
+ {
2417
+ "epoch": 43.45,
2418
+ "learning_rate": 1.0451908396946564e-05,
2419
+ "loss": 1.1392,
2420
+ "step": 30200
2421
+ },
2422
+ {
2423
+ "epoch": 43.6,
2424
+ "learning_rate": 1.0222900763358777e-05,
2425
+ "loss": 1.1318,
2426
+ "step": 30300
2427
+ },
2428
+ {
2429
+ "epoch": 43.74,
2430
+ "learning_rate": 9.99389312977099e-06,
2431
+ "loss": 1.1321,
2432
+ "step": 30400
2433
+ },
2434
+ {
2435
+ "epoch": 43.88,
2436
+ "learning_rate": 9.764885496183206e-06,
2437
+ "loss": 1.1307,
2438
+ "step": 30500
2439
+ },
2440
+ {
2441
+ "epoch": 43.88,
2442
+ "eval_cer": 0.09888206581640686,
2443
+ "eval_loss": 0.22221311926841736,
2444
+ "eval_runtime": 40.9097,
2445
+ "eval_samples_per_second": 11.147,
2446
+ "eval_steps_per_second": 1.393,
2447
+ "eval_wer": 0.30776200543642407,
2448
+ "step": 30500
2449
+ },
2450
+ {
2451
+ "epoch": 44.03,
2452
+ "learning_rate": 9.53587786259542e-06,
2453
+ "loss": 1.1358,
2454
+ "step": 30600
2455
+ },
2456
+ {
2457
+ "epoch": 44.17,
2458
+ "learning_rate": 9.306870229007633e-06,
2459
+ "loss": 1.1342,
2460
+ "step": 30700
2461
+ },
2462
+ {
2463
+ "epoch": 44.32,
2464
+ "learning_rate": 9.077862595419846e-06,
2465
+ "loss": 1.1348,
2466
+ "step": 30800
2467
+ },
2468
+ {
2469
+ "epoch": 44.46,
2470
+ "learning_rate": 8.84885496183206e-06,
2471
+ "loss": 1.1294,
2472
+ "step": 30900
2473
+ },
2474
+ {
2475
+ "epoch": 44.6,
2476
+ "learning_rate": 8.619847328244275e-06,
2477
+ "loss": 1.1419,
2478
+ "step": 31000
2479
+ },
2480
+ {
2481
+ "epoch": 44.6,
2482
+ "eval_cer": 0.09813415210203118,
2483
+ "eval_loss": 0.22149430215358734,
2484
+ "eval_runtime": 40.8027,
2485
+ "eval_samples_per_second": 11.176,
2486
+ "eval_steps_per_second": 1.397,
2487
+ "eval_wer": 0.3038356991845364,
2488
+ "step": 31000
2489
+ },
2490
+ {
2491
+ "epoch": 44.75,
2492
+ "learning_rate": 8.390839694656488e-06,
2493
+ "loss": 1.1191,
2494
+ "step": 31100
2495
+ },
2496
+ {
2497
+ "epoch": 44.89,
2498
+ "learning_rate": 8.161832061068702e-06,
2499
+ "loss": 1.1223,
2500
+ "step": 31200
2501
+ },
2502
+ {
2503
+ "epoch": 45.04,
2504
+ "learning_rate": 7.932824427480915e-06,
2505
+ "loss": 1.1393,
2506
+ "step": 31300
2507
+ },
2508
+ {
2509
+ "epoch": 45.18,
2510
+ "learning_rate": 7.703816793893129e-06,
2511
+ "loss": 1.1172,
2512
+ "step": 31400
2513
+ },
2514
+ {
2515
+ "epoch": 45.32,
2516
+ "learning_rate": 7.474809160305343e-06,
2517
+ "loss": 1.1231,
2518
+ "step": 31500
2519
+ },
2520
+ {
2521
+ "epoch": 45.32,
2522
+ "eval_cer": 0.09722878286883956,
2523
+ "eval_loss": 0.2193477302789688,
2524
+ "eval_runtime": 40.6396,
2525
+ "eval_samples_per_second": 11.221,
2526
+ "eval_steps_per_second": 1.403,
2527
+ "eval_wer": 0.3012684989429176,
2528
+ "step": 31500
2529
+ },
2530
+ {
2531
+ "epoch": 45.47,
2532
+ "learning_rate": 7.245801526717557e-06,
2533
+ "loss": 1.1289,
2534
+ "step": 31600
2535
+ },
2536
+ {
2537
+ "epoch": 45.61,
2538
+ "learning_rate": 7.016793893129771e-06,
2539
+ "loss": 1.1083,
2540
+ "step": 31700
2541
+ },
2542
+ {
2543
+ "epoch": 45.75,
2544
+ "learning_rate": 6.787786259541984e-06,
2545
+ "loss": 1.109,
2546
+ "step": 31800
2547
+ },
2548
+ {
2549
+ "epoch": 45.9,
2550
+ "learning_rate": 6.558778625954198e-06,
2551
+ "loss": 1.1218,
2552
+ "step": 31900
2553
+ },
2554
+ {
2555
+ "epoch": 46.04,
2556
+ "learning_rate": 6.329770992366412e-06,
2557
+ "loss": 1.139,
2558
+ "step": 32000
2559
+ },
2560
+ {
2561
+ "epoch": 46.04,
2562
+ "eval_cer": 0.09683514407179972,
2563
+ "eval_loss": 0.2162453532218933,
2564
+ "eval_runtime": 41.1951,
2565
+ "eval_samples_per_second": 11.069,
2566
+ "eval_steps_per_second": 1.384,
2567
+ "eval_wer": 0.30066445182724255,
2568
+ "step": 32000
2569
+ },
2570
+ {
2571
+ "epoch": 46.19,
2572
+ "learning_rate": 6.100763358778626e-06,
2573
+ "loss": 1.1155,
2574
+ "step": 32100
2575
+ },
2576
+ {
2577
+ "epoch": 46.33,
2578
+ "learning_rate": 5.8717557251908395e-06,
2579
+ "loss": 1.1212,
2580
+ "step": 32200
2581
+ },
2582
+ {
2583
+ "epoch": 46.47,
2584
+ "learning_rate": 5.642748091603053e-06,
2585
+ "loss": 1.1149,
2586
+ "step": 32300
2587
+ },
2588
+ {
2589
+ "epoch": 46.62,
2590
+ "learning_rate": 5.413740458015267e-06,
2591
+ "loss": 1.1183,
2592
+ "step": 32400
2593
+ },
2594
+ {
2595
+ "epoch": 46.76,
2596
+ "learning_rate": 5.184732824427481e-06,
2597
+ "loss": 1.1114,
2598
+ "step": 32500
2599
+ },
2600
+ {
2601
+ "epoch": 46.76,
2602
+ "eval_cer": 0.09596913871831207,
2603
+ "eval_loss": 0.2121613770723343,
2604
+ "eval_runtime": 40.6982,
2605
+ "eval_samples_per_second": 11.204,
2606
+ "eval_steps_per_second": 1.401,
2607
+ "eval_wer": 0.2982482633645424,
2608
+ "step": 32500
2609
+ },
2610
+ {
2611
+ "epoch": 46.91,
2612
+ "learning_rate": 4.955725190839695e-06,
2613
+ "loss": 1.1091,
2614
+ "step": 32600
2615
+ },
2616
+ {
2617
+ "epoch": 47.05,
2618
+ "learning_rate": 4.726717557251908e-06,
2619
+ "loss": 1.1148,
2620
+ "step": 32700
2621
+ },
2622
+ {
2623
+ "epoch": 47.19,
2624
+ "learning_rate": 4.497709923664122e-06,
2625
+ "loss": 1.0962,
2626
+ "step": 32800
2627
+ },
2628
+ {
2629
+ "epoch": 47.34,
2630
+ "learning_rate": 4.268702290076335e-06,
2631
+ "loss": 1.0984,
2632
+ "step": 32900
2633
+ },
2634
+ {
2635
+ "epoch": 47.48,
2636
+ "learning_rate": 4.03969465648855e-06,
2637
+ "loss": 1.111,
2638
+ "step": 33000
2639
+ },
2640
+ {
2641
+ "epoch": 47.48,
2642
+ "eval_cer": 0.09482758620689655,
2643
+ "eval_loss": 0.21248506009578705,
2644
+ "eval_runtime": 40.6368,
2645
+ "eval_samples_per_second": 11.221,
2646
+ "eval_steps_per_second": 1.403,
2647
+ "eval_wer": 0.2946239806704923,
2648
+ "step": 33000
2649
+ },
2650
+ {
2651
+ "epoch": 47.63,
2652
+ "learning_rate": 3.810687022900763e-06,
2653
+ "loss": 1.1031,
2654
+ "step": 33100
2655
+ },
2656
+ {
2657
+ "epoch": 47.77,
2658
+ "learning_rate": 3.581679389312977e-06,
2659
+ "loss": 1.1159,
2660
+ "step": 33200
2661
+ },
2662
+ {
2663
+ "epoch": 47.91,
2664
+ "learning_rate": 3.352671755725191e-06,
2665
+ "loss": 1.0905,
2666
+ "step": 33300
2667
+ },
2668
+ {
2669
+ "epoch": 48.06,
2670
+ "learning_rate": 3.1236641221374048e-06,
2671
+ "loss": 1.1087,
2672
+ "step": 33400
2673
+ },
2674
+ {
2675
+ "epoch": 48.2,
2676
+ "learning_rate": 2.894656488549618e-06,
2677
+ "loss": 1.0982,
2678
+ "step": 33500
2679
+ },
2680
+ {
2681
+ "epoch": 48.2,
2682
+ "eval_cer": 0.09533931664304834,
2683
+ "eval_loss": 0.2098563313484192,
2684
+ "eval_runtime": 40.5946,
2685
+ "eval_samples_per_second": 11.233,
2686
+ "eval_steps_per_second": 1.404,
2687
+ "eval_wer": 0.2956810631229236,
2688
+ "step": 33500
2689
+ },
2690
+ {
2691
+ "epoch": 48.34,
2692
+ "learning_rate": 2.66793893129771e-06,
2693
+ "loss": 1.0947,
2694
+ "step": 33600
2695
+ },
2696
+ {
2697
+ "epoch": 48.49,
2698
+ "learning_rate": 2.4389312977099237e-06,
2699
+ "loss": 1.1102,
2700
+ "step": 33700
2701
+ },
2702
+ {
2703
+ "epoch": 48.63,
2704
+ "learning_rate": 2.209923664122137e-06,
2705
+ "loss": 1.0891,
2706
+ "step": 33800
2707
+ },
2708
+ {
2709
+ "epoch": 48.78,
2710
+ "learning_rate": 1.980916030534351e-06,
2711
+ "loss": 1.0937,
2712
+ "step": 33900
2713
+ },
2714
+ {
2715
+ "epoch": 48.92,
2716
+ "learning_rate": 1.7519083969465647e-06,
2717
+ "loss": 1.109,
2718
+ "step": 34000
2719
+ },
2720
+ {
2721
+ "epoch": 48.92,
2722
+ "eval_cer": 0.09545740828216029,
2723
+ "eval_loss": 0.20918497443199158,
2724
+ "eval_runtime": 40.877,
2725
+ "eval_samples_per_second": 11.155,
2726
+ "eval_steps_per_second": 1.394,
2727
+ "eval_wer": 0.29553005134400484,
2728
+ "step": 34000
2729
+ },
2730
+ {
2731
+ "epoch": 49.06,
2732
+ "learning_rate": 1.5229007633587786e-06,
2733
+ "loss": 1.097,
2734
+ "step": 34100
2735
+ },
2736
+ {
2737
+ "epoch": 49.21,
2738
+ "learning_rate": 1.2938931297709922e-06,
2739
+ "loss": 1.0909,
2740
+ "step": 34200
2741
+ },
2742
+ {
2743
+ "epoch": 49.35,
2744
+ "learning_rate": 1.0648854961832059e-06,
2745
+ "loss": 1.1008,
2746
+ "step": 34300
2747
+ },
2748
+ {
2749
+ "epoch": 49.5,
2750
+ "learning_rate": 8.358778625954198e-07,
2751
+ "loss": 1.0904,
2752
+ "step": 34400
2753
+ },
2754
+ {
2755
+ "epoch": 49.64,
2756
+ "learning_rate": 6.068702290076335e-07,
2757
+ "loss": 1.0905,
2758
+ "step": 34500
2759
+ },
2760
+ {
2761
+ "epoch": 49.64,
2762
+ "eval_cer": 0.09526058888364038,
2763
+ "eval_loss": 0.20883652567863464,
2764
+ "eval_runtime": 40.602,
2765
+ "eval_samples_per_second": 11.231,
2766
+ "eval_steps_per_second": 1.404,
2767
+ "eval_wer": 0.2953790395650861,
2768
+ "step": 34500
2769
+ },
2770
+ {
2771
+ "epoch": 49.78,
2772
+ "learning_rate": 3.778625954198473e-07,
2773
+ "loss": 1.0961,
2774
+ "step": 34600
2775
+ },
2776
+ {
2777
+ "epoch": 49.93,
2778
+ "learning_rate": 1.4885496183206107e-07,
2779
+ "loss": 1.095,
2780
+ "step": 34700
2781
+ },
2782
+ {
2783
+ "epoch": 50.0,
2784
+ "step": 34750,
2785
+ "total_flos": 2.8392187465644065e+20,
2786
+ "train_loss": 2.2316733406121783,
2787
+ "train_runtime": 114311.9751,
2788
+ "train_samples_per_second": 9.737,
2789
+ "train_steps_per_second": 0.304
2790
+ }
2791
+ ],
2792
+ "max_steps": 34750,
2793
+ "num_train_epochs": 50,
2794
+ "total_flos": 2.8392187465644065e+20,
2795
+ "trial_name": null,
2796
+ "trial_params": null
2797
+ }