arampacha commited on
Commit
2d9955e
1 Parent(s): 740882f
README.md CHANGED
@@ -2,7 +2,7 @@
2
  license: apache-2.0
3
  tags:
4
  - automatic-speech-recognition
5
- - /workspace/data/hy/noizy_student_1/
6
  - generated_from_trainer
7
  model-index:
8
  - name: ''
@@ -14,11 +14,11 @@ should probably proofread and complete it, then remove this comment. -->
14
 
15
  #
16
 
17
- This model is a fine-tuned version of [facebook/wav2vec2-xls-r-1b](https://huggingface.co/facebook/wav2vec2-xls-r-1b) on the /WORKSPACE/DATA/HY/NOIZY_STUDENT_1/ - NA dataset.
18
  It achieves the following results on the evaluation set:
19
- - Loss: 0.3061
20
- - Wer: 0.3899
21
- - Cer: 0.0747
22
 
23
  ## Model description
24
 
@@ -40,31 +40,35 @@ The following hyperparameters were used during training:
40
  - learning_rate: 8e-05
41
  - train_batch_size: 16
42
  - eval_batch_size: 64
43
- - seed: 42
44
  - gradient_accumulation_steps: 8
45
  - total_train_batch_size: 128
46
  - optimizer: Adam with betas=(0.9,0.98) and epsilon=1e-08
47
  - lr_scheduler_type: cosine
48
  - lr_scheduler_warmup_ratio: 0.1
49
- - training_steps: 1200
50
  - mixed_precision_training: Native AMP
51
 
52
  ### Training results
53
 
54
- | Training Loss | Epoch | Step | Validation Loss | Wer | Cer |
55
- |:-------------:|:------:|:----:|:---------------:|:------:|:------:|
56
- | 5.195 | 9.09 | 100 | 3.1338 | 1.0 | 1.0 |
57
- | 2.3769 | 18.18 | 200 | 0.4825 | 0.6616 | 0.1360 |
58
- | 1.345 | 27.26 | 300 | 0.3781 | 0.5113 | 0.1057 |
59
- | 1.2001 | 36.35 | 400 | 0.3571 | 0.4602 | 0.0931 |
60
- | 1.0484 | 45.44 | 500 | 0.3121 | 0.4094 | 0.0776 |
61
- | 0.926 | 54.53 | 600 | 0.3227 | 0.4094 | 0.0801 |
62
- | 0.8854 | 63.62 | 700 | 0.3061 | 0.3899 | 0.0747 |
63
- | 0.8054 | 72.7 | 800 | 0.3159 | 0.3891 | 0.0745 |
64
- | 0.7442 | 81.79 | 900 | 0.3136 | 0.3802 | 0.0731 |
65
- | 0.714 | 90.88 | 1000 | 0.3230 | 0.3716 | 0.0717 |
66
- | 0.6641 | 99.97 | 1100 | 0.3193 | 0.3610 | 0.0693 |
67
- | 0.6367 | 109.09 | 1200 | 0.3254 | 0.3587 | 0.0692 |
 
 
 
 
68
 
69
 
70
  ### Framework versions
 
2
  license: apache-2.0
3
  tags:
4
  - automatic-speech-recognition
5
+ - /workspace/data/hy/noizy_student_2/
6
  - generated_from_trainer
7
  model-index:
8
  - name: ''
 
14
 
15
  #
16
 
17
+ This model is a fine-tuned version of [facebook/wav2vec2-xls-r-1b](https://huggingface.co/facebook/wav2vec2-xls-r-1b) on the /WORKSPACE/DATA/HY/NOIZY_STUDENT_2/ - NA dataset.
18
  It achieves the following results on the evaluation set:
19
+ - Loss: 0.2249
20
+ - Wer: 0.2783
21
+ - Cer: 0.0508
22
 
23
  ## Model description
24
 
 
40
  - learning_rate: 8e-05
41
  - train_batch_size: 16
42
  - eval_batch_size: 64
43
+ - seed: 842
44
  - gradient_accumulation_steps: 8
45
  - total_train_batch_size: 128
46
  - optimizer: Adam with betas=(0.9,0.98) and epsilon=1e-08
47
  - lr_scheduler_type: cosine
48
  - lr_scheduler_warmup_ratio: 0.1
49
+ - training_steps: 1600
50
  - mixed_precision_training: Native AMP
51
 
52
  ### Training results
53
 
54
+ | Training Loss | Epoch | Step | Validation Loss | Wer | Cer |
55
+ |:-------------:|:-----:|:----:|:---------------:|:------:|:------:|
56
+ | 4.9923 | 3.84 | 100 | 3.1562 | 1.0 | 1.0 |
57
+ | 2.1775 | 7.69 | 200 | 0.4334 | 0.5804 | 0.1122 |
58
+ | 1.3708 | 11.53 | 300 | 0.3106 | 0.4336 | 0.0797 |
59
+ | 1.2266 | 15.38 | 400 | 0.2675 | 0.3673 | 0.0673 |
60
+ | 1.093 | 19.23 | 500 | 0.2416 | 0.3501 | 0.0633 |
61
+ | 0.989 | 23.08 | 600 | 0.2320 | 0.3251 | 0.0611 |
62
+ | 0.9518 | 26.91 | 700 | 0.2413 | 0.3193 | 0.0584 |
63
+ | 0.9075 | 30.76 | 800 | 0.2354 | 0.3201 | 0.0593 |
64
+ | 0.878 | 34.61 | 900 | 0.2278 | 0.3126 | 0.0579 |
65
+ | 0.8563 | 38.46 | 1000 | 0.2327 | 0.2963 | 0.0548 |
66
+ | 0.8084 | 42.3 | 1100 | 0.2271 | 0.2923 | 0.0541 |
67
+ | 0.7845 | 46.15 | 1200 | 0.2333 | 0.2951 | 0.0537 |
68
+ | 0.7487 | 49.99 | 1300 | 0.2290 | 0.2888 | 0.0525 |
69
+ | 0.7182 | 53.84 | 1400 | 0.2341 | 0.2877 | 0.0535 |
70
+ | 0.7095 | 57.69 | 1500 | 0.2291 | 0.2818 | 0.0515 |
71
+ | 0.6953 | 61.53 | 1600 | 0.2249 | 0.2783 | 0.0508 |
72
 
73
 
74
  ### Framework versions
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "epoch": 109.09,
3
- "eval_cer": 0.07465857359635812,
4
- "eval_loss": 0.3061184883117676,
5
- "eval_runtime": 15.8663,
6
  "eval_samples": 335,
7
- "eval_samples_per_second": 21.114,
8
- "eval_steps_per_second": 0.378,
9
- "eval_wer": 0.38992974238875877,
10
- "train_loss": 1.3784224351247152,
11
- "train_runtime": 12041.2604,
12
- "train_samples": 1456,
13
- "train_samples_per_second": 12.756,
14
- "train_steps_per_second": 0.1
15
  }
 
1
  {
2
+ "epoch": 61.53,
3
+ "eval_cer": 0.05078401618614062,
4
+ "eval_loss": 0.2248678207397461,
5
+ "eval_runtime": 15.1655,
6
  "eval_samples": 335,
7
+ "eval_samples_per_second": 22.09,
8
+ "eval_steps_per_second": 0.396,
9
+ "eval_wer": 0.2782982045277127,
10
+ "train_loss": 1.2442097234725953,
11
+ "train_runtime": 17605.3989,
12
+ "train_samples": 3354,
13
+ "train_samples_per_second": 11.633,
14
+ "train_steps_per_second": 0.091
15
  }
alphabet.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"labels": [" ", "\u0561", "\u0562", "\u0563", "\u0564", "\u0565", "\u0566", "\u0567", "\u0568", "\u0569", "\u056a", "\u056b", "\u056c", "\u056d", "\u056e", "\u056f", "\u0570", "\u0571", "\u0572", "\u0573", "\u0574", "\u0575", "\u0576", "\u0577", "\u0578", "\u0579", "\u057a", "\u057b", "\u057c", "\u057d", "\u057e", "\u057f", "\u0580", "\u0581", "\u0582", "\u0583", "\u0584", "\u0585", "\u0586", "\u0587", "\u2047", "", "<s>", "</s>"], "is_bpe": false}
eval_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "epoch": 109.09,
3
- "eval_cer": 0.07465857359635812,
4
- "eval_loss": 0.3061184883117676,
5
- "eval_runtime": 15.8663,
6
  "eval_samples": 335,
7
- "eval_samples_per_second": 21.114,
8
- "eval_steps_per_second": 0.378,
9
- "eval_wer": 0.38992974238875877
10
  }
 
1
  {
2
+ "epoch": 61.53,
3
+ "eval_cer": 0.05078401618614062,
4
+ "eval_loss": 0.2248678207397461,
5
+ "eval_runtime": 15.1655,
6
  "eval_samples": 335,
7
+ "eval_samples_per_second": 22.09,
8
+ "eval_steps_per_second": 0.396,
9
+ "eval_wer": 0.2782982045277127
10
  }
mozilla-foundation_common_voice_8_0_hy-AM_test_eval_results.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ WER: 0.12724434035909446
2
+ CER: 0.02716236722306525
preprocessor_config.json CHANGED
@@ -5,5 +5,6 @@
5
  "padding_side": "right",
6
  "padding_value": 0,
7
  "return_attention_mask": true,
8
- "sampling_rate": 16000
 
9
  }
 
5
  "padding_side": "right",
6
  "padding_value": 0,
7
  "return_attention_mask": true,
8
+ "sampling_rate": 16000,
9
+ "processor_class": "Wav2Vec2ProcessorWithLM"
10
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:32516ce95b5883aab22adf9ea26a65e31f5fecfa4cb664058ac379897433753e
3
  size 3850538161
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbfcbb3e2a2fa0ab89db84e59692c3f3bee66d7cee3154a41e036d1300fac796
3
  size 3850538161
run.sh CHANGED
@@ -1,10 +1,10 @@
1
  python run_speech_recognition_ctc.py \
2
- --dataset_name="/workspace/data/hy/noizy_student_1/" \
3
  --train_split_name train \
4
  --model_name_or_path="facebook/wav2vec2-xls-r-1b" \
5
  --output_dir="./" \
6
- --overwrite_output_dir \
7
- --max_steps 1200 \
8
  --per_device_train_batch_size="16" \
9
  --per_device_eval_batch_size="64" \
10
  --gradient_accumulation_steps="8" \
@@ -19,7 +19,7 @@ python run_speech_recognition_ctc.py \
19
  --save_steps="100" \
20
  --eval_steps="100" \
21
  --logging_steps="100" \
22
- --save_total_limit="2" \
23
  --freeze_feature_encoder \
24
  --layerdrop="0.1" \
25
  --activation_dropout="0.1" \
@@ -35,7 +35,7 @@ python run_speech_recognition_ctc.py \
35
  --do_train --do_eval \
36
  --load_best_model_at_end \
37
  --report_to all \
38
- --run_name="xlsr-hy-ns-1b-1" \
39
  --wandb_project="xlsr-hy" \
40
  --seed 842 \
41
- --bnb --tristage_sched
 
1
  python run_speech_recognition_ctc.py \
2
+ --dataset_name="/workspace/data/hy/noizy_student_2/" \
3
  --train_split_name train \
4
  --model_name_or_path="facebook/wav2vec2-xls-r-1b" \
5
  --output_dir="./" \
6
+ --overwrite_output_dir \
7
+ --max_steps 1600 \
8
  --per_device_train_batch_size="16" \
9
  --per_device_eval_batch_size="64" \
10
  --gradient_accumulation_steps="8" \
 
19
  --save_steps="100" \
20
  --eval_steps="100" \
21
  --logging_steps="100" \
22
+ --save_total_limit="4" \
23
  --freeze_feature_encoder \
24
  --layerdrop="0.1" \
25
  --activation_dropout="0.1" \
 
35
  --do_train --do_eval \
36
  --load_best_model_at_end \
37
  --report_to all \
38
+ --run_name="xlsr-hy-ns-1b-2" \
39
  --wandb_project="xlsr-hy" \
40
  --seed 842 \
41
+ --bnb --tristage_sched
runs/Jan31_15-50-28_job-b1f4681b-d20d-47f2-af64-0c1734f4ff64/1643644281.5404139/events.out.tfevents.1643644281.job-b1f4681b-d20d-47f2-af64-0c1734f4ff64.56684.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76fcd03460e4b5ce4f010a3769fe861f72b2bfadbfd589764e455a3ba3c3a71a
3
+ size 4772
runs/Jan31_15-50-28_job-b1f4681b-d20d-47f2-af64-0c1734f4ff64/events.out.tfevents.1643644281.job-b1f4681b-d20d-47f2-af64-0c1734f4ff64.56684.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c24de85b4f3f4f325b767da1a58e5f175ba48b2220a68fc768c76cd0b46f4b6
3
+ size 13399
runs/Jan31_15-50-28_job-b1f4681b-d20d-47f2-af64-0c1734f4ff64/events.out.tfevents.1643661906.job-b1f4681b-d20d-47f2-af64-0c1734f4ff64.56684.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99e5f512d6ead4b4623f9f64409214136694f1205519849e3106891d5f63c8f4
3
+ size 405
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 109.09,
3
- "train_loss": 1.3784224351247152,
4
- "train_runtime": 12041.2604,
5
- "train_samples": 1456,
6
- "train_samples_per_second": 12.756,
7
- "train_steps_per_second": 0.1
8
  }
 
1
  {
2
+ "epoch": 61.53,
3
+ "train_loss": 1.2442097234725953,
4
+ "train_runtime": 17605.3989,
5
+ "train_samples": 3354,
6
+ "train_samples_per_second": 11.633,
7
+ "train_steps_per_second": 0.091
8
  }
trainer_state.json CHANGED
@@ -1,217 +1,281 @@
1
  {
2
- "best_metric": 0.3061184883117676,
3
- "best_model_checkpoint": "./checkpoint-700",
4
- "epoch": 109.08791208791209,
5
- "global_step": 1200,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 9.09,
12
- "learning_rate": 6.548e-05,
13
- "loss": 5.195,
14
  "step": 100
15
  },
16
  {
17
- "epoch": 9.09,
18
  "eval_cer": 1.0,
19
- "eval_loss": 3.133816719055176,
20
- "eval_runtime": 15.8007,
21
- "eval_samples_per_second": 21.202,
22
- "eval_steps_per_second": 0.38,
23
  "eval_wer": 1.0,
24
  "step": 100
25
  },
26
  {
27
- "epoch": 18.18,
28
  "learning_rate": 8e-05,
29
- "loss": 2.3769,
30
  "step": 200
31
  },
32
  {
33
- "epoch": 18.18,
34
- "eval_cer": 0.13601416287303997,
35
- "eval_loss": 0.48250746726989746,
36
- "eval_runtime": 15.2805,
37
- "eval_samples_per_second": 21.923,
38
  "eval_steps_per_second": 0.393,
39
- "eval_wer": 0.6615925058548009,
40
  "step": 200
41
  },
42
  {
43
- "epoch": 27.26,
44
  "learning_rate": 8e-05,
45
- "loss": 1.345,
46
  "step": 300
47
  },
48
  {
49
- "epoch": 27.26,
50
- "eval_cer": 0.10571573090541224,
51
- "eval_loss": 0.3780955374240875,
52
- "eval_runtime": 15.0829,
53
- "eval_samples_per_second": 22.211,
54
- "eval_steps_per_second": 0.398,
55
- "eval_wer": 0.5113192818110851,
56
  "step": 300
57
  },
58
  {
59
- "epoch": 36.35,
60
  "learning_rate": 8e-05,
61
- "loss": 1.2001,
62
  "step": 400
63
  },
64
  {
65
- "epoch": 36.35,
66
- "eval_cer": 0.09312089023773394,
67
- "eval_loss": 0.3571384847164154,
68
- "eval_runtime": 15.2304,
69
- "eval_samples_per_second": 21.996,
70
  "eval_steps_per_second": 0.394,
71
- "eval_wer": 0.4601873536299766,
72
  "step": 400
73
  },
74
  {
75
- "epoch": 45.44,
76
  "learning_rate": 8e-05,
77
- "loss": 1.0484,
78
  "step": 500
79
  },
80
  {
81
- "epoch": 45.44,
82
- "eval_cer": 0.07764289327263531,
83
- "eval_loss": 0.3121073842048645,
84
- "eval_runtime": 15.2202,
85
- "eval_samples_per_second": 22.01,
86
- "eval_steps_per_second": 0.394,
87
- "eval_wer": 0.4094457455113193,
88
  "step": 500
89
  },
90
  {
91
- "epoch": 54.53,
92
  "learning_rate": 8e-05,
93
- "loss": 0.926,
94
  "step": 600
95
  },
96
  {
97
- "epoch": 54.53,
98
- "eval_cer": 0.08012139605462823,
99
- "eval_loss": 0.3227134943008423,
100
- "eval_runtime": 15.3003,
101
- "eval_samples_per_second": 21.895,
102
- "eval_steps_per_second": 0.392,
103
- "eval_wer": 0.4094457455113193,
104
  "step": 600
105
  },
106
  {
107
- "epoch": 63.62,
108
- "learning_rate": 6.758666666666667e-05,
109
- "loss": 0.8854,
110
  "step": 700
111
  },
112
  {
113
- "epoch": 63.62,
114
- "eval_cer": 0.07465857359635812,
115
- "eval_loss": 0.3061184883117676,
116
- "eval_runtime": 17.2128,
117
- "eval_samples_per_second": 19.462,
118
- "eval_steps_per_second": 0.349,
119
- "eval_wer": 0.38992974238875877,
120
  "step": 700
121
  },
122
  {
123
- "epoch": 72.7,
124
- "learning_rate": 5.492e-05,
125
- "loss": 0.8054,
126
  "step": 800
127
  },
128
  {
129
- "epoch": 72.7,
130
- "eval_cer": 0.07445624683864441,
131
- "eval_loss": 0.315933495759964,
132
- "eval_runtime": 17.3337,
133
- "eval_samples_per_second": 19.327,
134
- "eval_steps_per_second": 0.346,
135
- "eval_wer": 0.38914910226385635,
136
  "step": 800
137
  },
138
  {
139
- "epoch": 81.79,
140
- "learning_rate": 4.225333333333334e-05,
141
- "loss": 0.7442,
142
  "step": 900
143
  },
144
  {
145
- "epoch": 81.79,
146
- "eval_cer": 0.07309054122407689,
147
- "eval_loss": 0.3135768473148346,
148
- "eval_runtime": 15.2189,
149
- "eval_samples_per_second": 22.012,
150
- "eval_steps_per_second": 0.394,
151
- "eval_wer": 0.3801717408274785,
152
  "step": 900
153
  },
154
  {
155
- "epoch": 90.88,
156
- "learning_rate": 2.958666666666667e-05,
157
- "loss": 0.714,
158
  "step": 1000
159
  },
160
  {
161
- "epoch": 90.88,
162
- "eval_cer": 0.07172483560950936,
163
- "eval_loss": 0.32300877571105957,
164
- "eval_runtime": 15.24,
165
- "eval_samples_per_second": 21.982,
166
- "eval_steps_per_second": 0.394,
167
- "eval_wer": 0.37158469945355194,
168
  "step": 1000
169
  },
170
  {
171
- "epoch": 99.97,
172
- "learning_rate": 1.6920000000000004e-05,
173
- "loss": 0.6641,
174
  "step": 1100
175
  },
176
  {
177
- "epoch": 99.97,
178
- "eval_cer": 0.06934749620637329,
179
- "eval_loss": 0.31931421160697937,
180
- "eval_runtime": 15.1822,
181
- "eval_samples_per_second": 22.065,
182
- "eval_steps_per_second": 0.395,
183
- "eval_wer": 0.36104605776736926,
184
  "step": 1100
185
  },
186
  {
187
- "epoch": 109.09,
188
- "learning_rate": 4.253333333333336e-06,
189
- "loss": 0.6367,
190
  "step": 1200
191
  },
192
  {
193
- "epoch": 109.09,
194
- "eval_cer": 0.06924633282751644,
195
- "eval_loss": 0.32542118430137634,
196
- "eval_runtime": 15.2075,
197
- "eval_samples_per_second": 22.029,
198
- "eval_steps_per_second": 0.395,
199
- "eval_wer": 0.358704137392662,
200
  "step": 1200
201
  },
202
  {
203
- "epoch": 109.09,
204
- "step": 1200,
205
- "total_flos": 9.090968694813691e+19,
206
- "train_loss": 1.3784224351247152,
207
- "train_runtime": 12041.2604,
208
- "train_samples_per_second": 12.756,
209
- "train_steps_per_second": 0.1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
  }
211
  ],
212
- "max_steps": 1200,
213
- "num_train_epochs": 110,
214
- "total_flos": 9.090968694813691e+19,
215
  "trial_name": null,
216
  "trial_params": null
217
  }
 
1
  {
2
+ "best_metric": 0.2248678207397461,
3
+ "best_model_checkpoint": "./checkpoint-1600",
4
+ "epoch": 61.53333333333333,
5
+ "global_step": 1600,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 3.84,
12
+ "learning_rate": 4.980500000000001e-05,
13
+ "loss": 4.9923,
14
  "step": 100
15
  },
16
  {
17
+ "epoch": 3.84,
18
  "eval_cer": 1.0,
19
+ "eval_loss": 3.156188488006592,
20
+ "eval_runtime": 15.7049,
21
+ "eval_samples_per_second": 21.331,
22
+ "eval_steps_per_second": 0.382,
23
  "eval_wer": 1.0,
24
  "step": 100
25
  },
26
  {
27
+ "epoch": 7.69,
28
  "learning_rate": 8e-05,
29
+ "loss": 2.1775,
30
  "step": 200
31
  },
32
  {
33
+ "epoch": 7.69,
34
+ "eval_cer": 0.11219018715225089,
35
+ "eval_loss": 0.43336454033851624,
36
+ "eval_runtime": 15.2789,
37
+ "eval_samples_per_second": 21.926,
38
  "eval_steps_per_second": 0.393,
39
+ "eval_wer": 0.5804059328649492,
40
  "step": 200
41
  },
42
  {
43
+ "epoch": 11.53,
44
  "learning_rate": 8e-05,
45
+ "loss": 1.3708,
46
  "step": 300
47
  },
48
  {
49
+ "epoch": 11.53,
50
+ "eval_cer": 0.07966616084977238,
51
+ "eval_loss": 0.3105751574039459,
52
+ "eval_runtime": 15.048,
53
+ "eval_samples_per_second": 22.262,
54
+ "eval_steps_per_second": 0.399,
55
+ "eval_wer": 0.4336455893832943,
56
  "step": 300
57
  },
58
  {
59
+ "epoch": 15.38,
60
  "learning_rate": 8e-05,
61
+ "loss": 1.2266,
62
  "step": 400
63
  },
64
  {
65
+ "epoch": 15.38,
66
+ "eval_cer": 0.06732422862923622,
67
+ "eval_loss": 0.26751142740249634,
68
+ "eval_runtime": 15.232,
69
+ "eval_samples_per_second": 21.993,
70
  "eval_steps_per_second": 0.394,
71
+ "eval_wer": 0.3672911787665886,
72
  "step": 400
73
  },
74
  {
75
+ "epoch": 19.23,
76
  "learning_rate": 8e-05,
77
+ "loss": 1.093,
78
  "step": 500
79
  },
80
  {
81
+ "epoch": 19.23,
82
+ "eval_cer": 0.06327769347496207,
83
+ "eval_loss": 0.24162611365318298,
84
+ "eval_runtime": 14.995,
85
+ "eval_samples_per_second": 22.341,
86
+ "eval_steps_per_second": 0.4,
87
+ "eval_wer": 0.35011709601873536,
88
  "step": 500
89
  },
90
  {
91
+ "epoch": 23.08,
92
  "learning_rate": 8e-05,
93
+ "loss": 0.989,
94
  "step": 600
95
  },
96
  {
97
+ "epoch": 23.08,
98
+ "eval_cer": 0.06105209914011128,
99
+ "eval_loss": 0.23200440406799316,
100
+ "eval_runtime": 15.3525,
101
+ "eval_samples_per_second": 21.821,
102
+ "eval_steps_per_second": 0.391,
103
+ "eval_wer": 0.3251366120218579,
104
  "step": 600
105
  },
106
  {
107
+ "epoch": 26.91,
108
+ "learning_rate": 8e-05,
109
+ "loss": 0.9518,
110
  "step": 700
111
  },
112
  {
113
+ "epoch": 26.91,
114
+ "eval_cer": 0.05842185128983308,
115
+ "eval_loss": 0.2413272261619568,
116
+ "eval_runtime": 15.2385,
117
+ "eval_samples_per_second": 21.984,
118
+ "eval_steps_per_second": 0.394,
119
+ "eval_wer": 0.3192818110850898,
120
  "step": 700
121
  },
122
  {
123
+ "epoch": 30.76,
124
+ "learning_rate": 8e-05,
125
+ "loss": 0.9075,
126
  "step": 800
127
  },
128
  {
129
+ "epoch": 30.76,
130
+ "eval_cer": 0.05933232169954476,
131
+ "eval_loss": 0.23544833064079285,
132
+ "eval_runtime": 15.1938,
133
+ "eval_samples_per_second": 22.049,
134
+ "eval_steps_per_second": 0.395,
135
+ "eval_wer": 0.3200624512099922,
136
  "step": 800
137
  },
138
  {
139
+ "epoch": 34.61,
140
+ "learning_rate": 7.059500000000001e-05,
141
+ "loss": 0.878,
142
  "step": 900
143
  },
144
  {
145
+ "epoch": 34.61,
146
+ "eval_cer": 0.057916034395548814,
147
+ "eval_loss": 0.22777308523654938,
148
+ "eval_runtime": 14.9728,
149
+ "eval_samples_per_second": 22.374,
150
+ "eval_steps_per_second": 0.401,
151
+ "eval_wer": 0.3126463700234192,
152
  "step": 900
153
  },
154
  {
155
+ "epoch": 38.46,
156
+ "learning_rate": 6.109500000000001e-05,
157
+ "loss": 0.8563,
158
  "step": 1000
159
  },
160
  {
161
+ "epoch": 38.46,
162
+ "eval_cer": 0.054779969650986346,
163
+ "eval_loss": 0.2326740324497223,
164
+ "eval_runtime": 15.1749,
165
+ "eval_samples_per_second": 22.076,
166
+ "eval_steps_per_second": 0.395,
167
+ "eval_wer": 0.2962529274004684,
168
  "step": 1000
169
  },
170
  {
171
+ "epoch": 42.3,
172
+ "learning_rate": 5.169000000000001e-05,
173
+ "loss": 0.8084,
174
  "step": 1100
175
  },
176
  {
177
+ "epoch": 42.3,
178
+ "eval_cer": 0.05407182599898837,
179
+ "eval_loss": 0.22712552547454834,
180
+ "eval_runtime": 15.3083,
181
+ "eval_samples_per_second": 21.884,
182
+ "eval_steps_per_second": 0.392,
183
+ "eval_wer": 0.2923497267759563,
184
  "step": 1100
185
  },
186
  {
187
+ "epoch": 46.15,
188
+ "learning_rate": 4.219000000000001e-05,
189
+ "loss": 0.7845,
190
  "step": 1200
191
  },
192
  {
193
+ "epoch": 46.15,
194
+ "eval_cer": 0.053667172483560954,
195
+ "eval_loss": 0.23326420783996582,
196
+ "eval_runtime": 15.1559,
197
+ "eval_samples_per_second": 22.104,
198
+ "eval_steps_per_second": 0.396,
199
+ "eval_wer": 0.29508196721311475,
200
  "step": 1200
201
  },
202
  {
203
+ "epoch": 49.99,
204
+ "learning_rate": 3.269000000000001e-05,
205
+ "loss": 0.7487,
206
+ "step": 1300
207
+ },
208
+ {
209
+ "epoch": 49.99,
210
+ "eval_cer": 0.052453211937278706,
211
+ "eval_loss": 0.22895006835460663,
212
+ "eval_runtime": 15.373,
213
+ "eval_samples_per_second": 21.791,
214
+ "eval_steps_per_second": 0.39,
215
+ "eval_wer": 0.2888368462138954,
216
+ "step": 1300
217
+ },
218
+ {
219
+ "epoch": 53.84,
220
+ "learning_rate": 2.319e-05,
221
+ "loss": 0.7182,
222
+ "step": 1400
223
+ },
224
+ {
225
+ "epoch": 53.84,
226
+ "eval_cer": 0.05346484572584724,
227
+ "eval_loss": 0.23406584560871124,
228
+ "eval_runtime": 15.2056,
229
+ "eval_samples_per_second": 22.031,
230
+ "eval_steps_per_second": 0.395,
231
+ "eval_wer": 0.28766588602654175,
232
+ "step": 1400
233
+ },
234
+ {
235
+ "epoch": 57.69,
236
+ "learning_rate": 1.369e-05,
237
+ "loss": 0.7095,
238
+ "step": 1500
239
+ },
240
+ {
241
+ "epoch": 57.69,
242
+ "eval_cer": 0.05154274152756702,
243
+ "eval_loss": 0.22908572852611542,
244
+ "eval_runtime": 15.2684,
245
+ "eval_samples_per_second": 21.941,
246
+ "eval_steps_per_second": 0.393,
247
+ "eval_wer": 0.2818110850897736,
248
+ "step": 1500
249
+ },
250
+ {
251
+ "epoch": 61.53,
252
+ "learning_rate": 4.190000000000005e-06,
253
+ "loss": 0.6953,
254
+ "step": 1600
255
+ },
256
+ {
257
+ "epoch": 61.53,
258
+ "eval_cer": 0.05078401618614062,
259
+ "eval_loss": 0.2248678207397461,
260
+ "eval_runtime": 15.2139,
261
+ "eval_samples_per_second": 22.019,
262
+ "eval_steps_per_second": 0.394,
263
+ "eval_wer": 0.2782982045277127,
264
+ "step": 1600
265
+ },
266
+ {
267
+ "epoch": 61.53,
268
+ "step": 1600,
269
+ "total_flos": 1.3126730002882698e+20,
270
+ "train_loss": 1.2442097234725953,
271
+ "train_runtime": 17605.3989,
272
+ "train_samples_per_second": 11.633,
273
+ "train_steps_per_second": 0.091
274
  }
275
  ],
276
+ "max_steps": 1600,
277
+ "num_train_epochs": 62,
278
+ "total_flos": 1.3126730002882698e+20,
279
  "trial_name": null,
280
  "trial_params": null
281
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9886d1fbbad75a820f26ae7488ec03668e4ac81f646bc9c64621c5a4caedeba1
3
  size 3055
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c934f0ed6fa80bfe4f2228c9550d942f9d713597358d2f57a1375b6454c2d03d
3
  size 3055