tuananh7198 commited on
Commit
292eac2
1 Parent(s): 6d8a4d1

Training in progress, step 1000

Browse files
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "epoch": 4.01,
3
  "eval_loss": 0.5422283411026001,
4
  "eval_runtime": 892.2169,
5
  "eval_samples_per_second": 1.386,
6
  "eval_steps_per_second": 0.087,
7
  "eval_wer": 20.04825619653433,
8
- "train_loss": 0.19602714616060257,
9
- "train_runtime": 17744.1511,
10
- "train_samples_per_second": 0.902,
11
- "train_steps_per_second": 0.056
12
  }
 
1
  {
2
+ "epoch": 1.0,
3
  "eval_loss": 0.5422283411026001,
4
  "eval_runtime": 892.2169,
5
  "eval_samples_per_second": 1.386,
6
  "eval_steps_per_second": 0.087,
7
  "eval_wer": 20.04825619653433,
8
+ "train_loss": 0.7568578720092773,
9
+ "train_runtime": 77.245,
10
+ "train_samples_per_second": 0.104,
11
+ "train_steps_per_second": 0.013
12
  }
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "openai/whisper-large",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
5
  "architectures": [
@@ -11,17 +11,17 @@
11
  50257
12
  ],
13
  "bos_token_id": 50257,
14
- "d_model": 1280,
15
- "decoder_attention_heads": 20,
16
- "decoder_ffn_dim": 5120,
17
  "decoder_layerdrop": 0.0,
18
- "decoder_layers": 32,
19
  "decoder_start_token_id": 50258,
20
  "dropout": 0.0,
21
- "encoder_attention_heads": 20,
22
- "encoder_ffn_dim": 5120,
23
  "encoder_layerdrop": 0.0,
24
- "encoder_layers": 32,
25
  "eos_token_id": 50257,
26
  "forced_decoder_ids": null,
27
  "init_std": 0.02,
@@ -30,7 +30,7 @@
30
  "max_source_positions": 1500,
31
  "max_target_positions": 448,
32
  "model_type": "whisper",
33
- "num_hidden_layers": 32,
34
  "num_mel_bins": 80,
35
  "pad_token_id": 50257,
36
  "scale_embedding": false,
 
1
  {
2
+ "_name_or_path": "openai/whisper-medium",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
5
  "architectures": [
 
11
  50257
12
  ],
13
  "bos_token_id": 50257,
14
+ "d_model": 1024,
15
+ "decoder_attention_heads": 16,
16
+ "decoder_ffn_dim": 4096,
17
  "decoder_layerdrop": 0.0,
18
+ "decoder_layers": 24,
19
  "decoder_start_token_id": 50258,
20
  "dropout": 0.0,
21
+ "encoder_attention_heads": 16,
22
+ "encoder_ffn_dim": 4096,
23
  "encoder_layerdrop": 0.0,
24
+ "encoder_layers": 24,
25
  "eos_token_id": 50257,
26
  "forced_decoder_ids": null,
27
  "init_std": 0.02,
 
30
  "max_source_positions": 1500,
31
  "max_target_positions": 448,
32
  "model_type": "whisper",
33
+ "num_hidden_layers": 24,
34
  "num_mel_bins": 80,
35
  "pad_token_id": 50257,
36
  "scale_embedding": false,
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7b171162e0f6c3c123f162425aaf1faa210d63e93692be3832d6eb1f2da952b1
3
- size 6173647530
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e01ec9eccb75a5c62b8326712df5945b6f8bf00f6fe6486fc6d4972a986c62a0
3
+ size 3055748571
run.sh CHANGED
@@ -1,15 +1,15 @@
1
  python run_speech_recognition_seq2seq_streaming.py \
2
- --model_name_or_path="openai/whisper-large" \
3
  --dataset_name="mozilla-foundation/common_voice_11_0" \
4
  --dataset_config_name="vi" \
5
  --language="vi" \
6
  --train_split_name="train+validation" \
7
  --eval_split_name="test" \
8
  --model_index_name="Whisper Medium Vietnamese" \
9
- --max_steps="1" \
10
  --output_dir="./" \
11
- --per_device_train_batch_size="1" \
12
- --per_device_eval_batch_size="1" \
13
  --logging_steps="25" \
14
  --learning_rate="1e-5" \
15
  --warmup_steps="100" \
@@ -35,4 +35,3 @@ python run_speech_recognition_seq2seq_streaming.py \
35
  --streaming \
36
  --use_auth_token \
37
  --push_to_hub \
38
- --optim="adafactor"
 
1
  python run_speech_recognition_seq2seq_streaming.py \
2
+ --model_name_or_path="openai/whisper-medium" \
3
  --dataset_name="mozilla-foundation/common_voice_11_0" \
4
  --dataset_config_name="vi" \
5
  --language="vi" \
6
  --train_split_name="train+validation" \
7
  --eval_split_name="test" \
8
  --model_index_name="Whisper Medium Vietnamese" \
9
+ --max_steps="1000" \
10
  --output_dir="./" \
11
+ --per_device_train_batch_size="2" \
12
+ --per_device_eval_batch_size="2" \
13
  --logging_steps="25" \
14
  --learning_rate="1e-5" \
15
  --warmup_steps="100" \
 
35
  --streaming \
36
  --use_auth_token \
37
  --push_to_hub \
 
runs/Dec16_22-51-47_ip-172-16-16-209.ec2.internal/1671231123.5662413/events.out.tfevents.1671231123.ip-172-16-16-209.ec2.internal.29304.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a64b73ab7fb71819df4f35da4f82b57f8d4a401bdd4ac3a4b12ce9f8068dae34
3
+ size 5886
runs/Dec16_22-51-47_ip-172-16-16-209.ec2.internal/events.out.tfevents.1671231123.ip-172-16-16-209.ec2.internal.29304.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18f84ebd1ca04495b54fbeed7c6c8871e5430ea4d8d352d2b89597f80d88da48
3
+ size 4283
runs/Dec16_22-54-22_ip-172-16-16-209.ec2.internal/1671231277.9215746/events.out.tfevents.1671231277.ip-172-16-16-209.ec2.internal.32089.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed0867f23c0db7d7d49f11313e6c24756f8979d043b96b80c81d480be49c357d
3
+ size 5885
runs/Dec16_22-54-22_ip-172-16-16-209.ec2.internal/events.out.tfevents.1671231277.ip-172-16-16-209.ec2.internal.32089.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:984a6d26d632c0cd1f8b1c47f8108c272f2be5c6e42535c0bf1fbe4ff12261cc
3
+ size 10865
tokenizer_config.json CHANGED
@@ -19,7 +19,7 @@
19
  },
20
  "errors": "replace",
21
  "model_max_length": 1024,
22
- "name_or_path": "openai/whisper-large",
23
  "pad_token": null,
24
  "processor_class": "WhisperProcessor",
25
  "return_attention_mask": false,
 
19
  },
20
  "errors": "replace",
21
  "model_max_length": 1024,
22
+ "name_or_path": "openai/whisper-medium",
23
  "pad_token": null,
24
  "processor_class": "WhisperProcessor",
25
  "return_attention_mask": false,
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 4.01,
3
- "train_loss": 0.19602714616060257,
4
- "train_runtime": 17744.1511,
5
- "train_samples_per_second": 0.902,
6
- "train_steps_per_second": 0.056
7
  }
 
1
  {
2
+ "epoch": 1.0,
3
+ "train_loss": 0.7568578720092773,
4
+ "train_runtime": 77.245,
5
+ "train_samples_per_second": 0.104,
6
+ "train_steps_per_second": 0.013
7
  }
trainer_state.json CHANGED
@@ -1,274 +1,25 @@
1
  {
2
- "best_metric": 20.04825619653433,
3
- "best_model_checkpoint": "./checkpoint-1000",
4
- "epoch": 4.008,
5
- "global_step": 1000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
- {
11
- "epoch": 0.03,
12
- "learning_rate": 2.1000000000000002e-06,
13
- "loss": 1.7151,
14
- "step": 25
15
- },
16
- {
17
- "epoch": 0.05,
18
- "learning_rate": 4.600000000000001e-06,
19
- "loss": 0.7627,
20
- "step": 50
21
- },
22
- {
23
- "epoch": 0.07,
24
- "learning_rate": 7.100000000000001e-06,
25
- "loss": 0.3638,
26
- "step": 75
27
- },
28
- {
29
- "epoch": 0.1,
30
- "learning_rate": 9.600000000000001e-06,
31
- "loss": 0.3364,
32
- "step": 100
33
- },
34
- {
35
- "epoch": 0.12,
36
- "learning_rate": 9.766666666666667e-06,
37
- "loss": 0.3486,
38
- "step": 125
39
- },
40
- {
41
- "epoch": 0.15,
42
- "learning_rate": 9.48888888888889e-06,
43
- "loss": 0.3894,
44
- "step": 150
45
- },
46
- {
47
- "epoch": 0.17,
48
- "learning_rate": 9.211111111111111e-06,
49
- "loss": 0.4697,
50
- "step": 175
51
- },
52
- {
53
- "epoch": 0.2,
54
- "learning_rate": 8.933333333333333e-06,
55
- "loss": 0.3761,
56
- "step": 200
57
- },
58
- {
59
- "epoch": 0.23,
60
- "learning_rate": 8.655555555555557e-06,
61
- "loss": 0.3839,
62
- "step": 225
63
- },
64
  {
65
  "epoch": 1.0,
66
- "learning_rate": 8.377777777777779e-06,
67
- "loss": 0.3711,
68
- "step": 250
69
- },
70
- {
71
- "epoch": 1.03,
72
- "learning_rate": 8.1e-06,
73
- "loss": 0.1781,
74
- "step": 275
75
- },
76
- {
77
- "epoch": 1.05,
78
- "learning_rate": 7.822222222222224e-06,
79
- "loss": 0.1591,
80
- "step": 300
81
- },
82
- {
83
- "epoch": 1.08,
84
- "learning_rate": 7.544444444444445e-06,
85
- "loss": 0.1354,
86
- "step": 325
87
- },
88
- {
89
- "epoch": 1.1,
90
- "learning_rate": 7.266666666666668e-06,
91
- "loss": 0.1823,
92
- "step": 350
93
- },
94
- {
95
- "epoch": 1.13,
96
- "learning_rate": 6.9888888888888895e-06,
97
- "loss": 0.1651,
98
- "step": 375
99
- },
100
- {
101
- "epoch": 1.15,
102
- "learning_rate": 6.711111111111111e-06,
103
- "loss": 0.1603,
104
- "step": 400
105
- },
106
- {
107
- "epoch": 1.18,
108
- "learning_rate": 6.433333333333333e-06,
109
- "loss": 0.1426,
110
- "step": 425
111
- },
112
- {
113
- "epoch": 1.2,
114
- "learning_rate": 6.155555555555556e-06,
115
- "loss": 0.178,
116
- "step": 450
117
- },
118
- {
119
- "epoch": 1.23,
120
- "learning_rate": 5.877777777777778e-06,
121
- "loss": 0.1491,
122
- "step": 475
123
- },
124
- {
125
- "epoch": 2.0,
126
- "learning_rate": 5.600000000000001e-06,
127
- "loss": 0.0891,
128
- "step": 500
129
- },
130
- {
131
- "epoch": 2.03,
132
- "learning_rate": 5.322222222222223e-06,
133
- "loss": 0.0439,
134
- "step": 525
135
- },
136
- {
137
- "epoch": 2.05,
138
- "learning_rate": 5.044444444444445e-06,
139
- "loss": 0.0815,
140
- "step": 550
141
- },
142
- {
143
- "epoch": 2.08,
144
- "learning_rate": 4.766666666666667e-06,
145
- "loss": 0.0708,
146
- "step": 575
147
- },
148
- {
149
- "epoch": 2.1,
150
- "learning_rate": 4.488888888888889e-06,
151
- "loss": 0.0662,
152
- "step": 600
153
- },
154
- {
155
- "epoch": 2.13,
156
- "learning_rate": 4.211111111111112e-06,
157
- "loss": 0.076,
158
- "step": 625
159
- },
160
- {
161
- "epoch": 2.15,
162
- "learning_rate": 3.9333333333333335e-06,
163
- "loss": 0.0632,
164
- "step": 650
165
- },
166
- {
167
- "epoch": 2.18,
168
- "learning_rate": 3.6555555555555562e-06,
169
- "loss": 0.0461,
170
- "step": 675
171
- },
172
- {
173
- "epoch": 2.2,
174
- "learning_rate": 3.377777777777778e-06,
175
- "loss": 0.0826,
176
- "step": 700
177
- },
178
- {
179
- "epoch": 2.23,
180
- "learning_rate": 3.1000000000000004e-06,
181
- "loss": 0.0674,
182
- "step": 725
183
- },
184
- {
185
- "epoch": 3.01,
186
- "learning_rate": 2.8222222222222223e-06,
187
- "loss": 0.0247,
188
- "step": 750
189
- },
190
- {
191
- "epoch": 3.03,
192
- "learning_rate": 2.5444444444444446e-06,
193
- "loss": 0.0147,
194
- "step": 775
195
- },
196
- {
197
- "epoch": 3.06,
198
- "learning_rate": 2.266666666666667e-06,
199
- "loss": 0.0141,
200
- "step": 800
201
- },
202
- {
203
- "epoch": 3.08,
204
- "learning_rate": 1.988888888888889e-06,
205
- "loss": 0.0255,
206
- "step": 825
207
- },
208
- {
209
- "epoch": 3.11,
210
- "learning_rate": 1.7111111111111112e-06,
211
- "loss": 0.011,
212
- "step": 850
213
- },
214
- {
215
- "epoch": 3.13,
216
- "learning_rate": 1.4333333333333335e-06,
217
- "loss": 0.0099,
218
- "step": 875
219
- },
220
- {
221
- "epoch": 3.16,
222
- "learning_rate": 1.1555555555555556e-06,
223
- "loss": 0.013,
224
- "step": 900
225
- },
226
- {
227
- "epoch": 3.18,
228
- "learning_rate": 8.777777777777778e-07,
229
- "loss": 0.0213,
230
- "step": 925
231
- },
232
- {
233
- "epoch": 3.21,
234
- "learning_rate": 6.000000000000001e-07,
235
- "loss": 0.0119,
236
- "step": 950
237
- },
238
- {
239
- "epoch": 3.23,
240
- "learning_rate": 3.2222222222222227e-07,
241
- "loss": 0.0173,
242
- "step": 975
243
- },
244
- {
245
- "epoch": 4.01,
246
- "learning_rate": 4.444444444444445e-08,
247
- "loss": 0.0241,
248
- "step": 1000
249
- },
250
- {
251
- "epoch": 4.01,
252
- "eval_loss": 0.5422283411026001,
253
- "eval_runtime": 909.8399,
254
- "eval_samples_per_second": 1.36,
255
- "eval_steps_per_second": 0.086,
256
- "eval_wer": 20.04825619653433,
257
- "step": 1000
258
- },
259
- {
260
- "epoch": 4.01,
261
- "step": 1000,
262
- "total_flos": 2.04120981504e+18,
263
- "train_loss": 0.19602714616060257,
264
- "train_runtime": 17744.1511,
265
- "train_samples_per_second": 0.902,
266
- "train_steps_per_second": 0.056
267
  }
268
  ],
269
- "max_steps": 1000,
270
  "num_train_epochs": 9223372036854775807,
271
- "total_flos": 2.04120981504e+18,
272
  "trial_name": null,
273
  "trial_params": null
274
  }
 
1
  {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "global_step": 1,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  {
11
  "epoch": 1.0,
12
+ "step": 1,
13
+ "total_flos": 2123171020800000.0,
14
+ "train_loss": 0.7568578720092773,
15
+ "train_runtime": 77.245,
16
+ "train_samples_per_second": 0.104,
17
+ "train_steps_per_second": 0.013
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  }
19
  ],
20
+ "max_steps": 1,
21
  "num_train_epochs": 9223372036854775807,
22
+ "total_flos": 2123171020800000.0,
23
  "trial_name": null,
24
  "trial_params": null
25
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:830f6ecfa0a963c5e4ff14505bfc4032398db3e6c643bafb761761372b95daa0
3
  size 3567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:797ed569144ee33ce5c73c8ff626254eade6c09a5f329d456d3905f401ac3e71
3
  size 3567