csukuangfj commited on
Commit
f542cea
1 Parent(s): 1d48bfe

small fixes

Browse files
Files changed (4) hide show
  1. app.py +7 -3
  2. examples.py +47 -27
  3. model.py +13 -4
  4. test_wavs/tal_csasr/0.wav +0 -0
app.py CHANGED
@@ -125,9 +125,6 @@ def process(
125
 
126
  filename = convert_to_wav(in_filename)
127
 
128
- logging.info(f"filename: {in_filename}")
129
- os.system(f"ffprobe {filename}")
130
-
131
  now = datetime.now()
132
  date_time = now.strftime("%Y-%m-%d %H:%M:%S.%f")
133
  logging.info(f"Started at {date_time}")
@@ -319,6 +316,13 @@ with demo:
319
  )
320
  gr.Markdown(description)
321
 
 
 
 
 
 
 
 
322
  if __name__ == "__main__":
323
  formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"
324
 
 
125
 
126
  filename = convert_to_wav(in_filename)
127
 
 
 
 
128
  now = datetime.now()
129
  date_time = now.strftime("%Y-%m-%d %H:%M:%S.%f")
130
  logging.info(f"Started at {date_time}")
 
316
  )
317
  gr.Markdown(description)
318
 
319
+ torch.set_num_threads(1)
320
+ torch.set_num_interop_threads(1)
321
+
322
+ torch._C._jit_set_profiling_executor(False)
323
+ torch._C._jit_set_profiling_mode(False)
324
+ torch._C._set_graph_executor_optimize(False)
325
+
326
  if __name__ == "__main__":
327
  formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"
328
 
examples.py CHANGED
@@ -16,6 +16,48 @@
16
  # See the License for the specific language governing permissions and
17
  # limitations under the License.
18
  examples = [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  # librispeech
20
  # https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless5-2022-05-13/tree/main/test_wavs
21
  [
@@ -154,57 +196,42 @@ examples = [
154
  "./test_wavs/aidatatang_200zh/T0055G0036S0004.wav",
155
  ],
156
  # tal_csasr
157
- # https://huggingface.co/luomingshuang/icefall_asr_tal-csasr_pruned_transducer_stateless5/tree/main/test_wavs
158
  [
159
  "Chinese+English",
160
- "luomingshuang/icefall_asr_tal-csasr_pruned_transducer_stateless5",
161
  "greedy_search",
162
  4,
163
  "./test_wavs/tal_csasr/210_36476_210_8341_1_1533271973_7057520_132.wav",
164
  ],
165
  [
166
  "Chinese+English",
167
- "luomingshuang/icefall_asr_tal-csasr_pruned_transducer_stateless5",
168
  "greedy_search",
169
  4,
170
  "./test_wavs/tal_csasr/210_36476_210_8341_1_1533271973_7057520_138.wav",
171
  ],
172
  [
173
  "Chinese+English",
174
- "luomingshuang/icefall_asr_tal-csasr_pruned_transducer_stateless5",
175
  "greedy_search",
176
  4,
177
  "./test_wavs/tal_csasr/210_36476_210_8341_1_1533271973_7057520_145.wav",
178
  ],
179
  [
180
  "Tibetan",
181
- "syzym/icefall-asr-xbmu-amdo31-pruned-transducer-stateless5-2022-11-29",
182
  "greedy_search",
183
  4,
184
  "./test_wavs/tibetan/a_0_cacm-A70_31116.wav",
185
  ],
186
  [
187
  "Tibetan",
188
- "syzym/icefall-asr-xbmu-amdo31-pruned-transducer-stateless5-2022-11-29",
189
- "greedy_search",
190
- 4,
191
- "./test_wavs/tibetan/a_0_cacm-A70_31117.wav",
192
- ],
193
- [
194
- "Tibetan",
195
- "syzym/icefall-asr-xbmu-amdo31-pruned-transducer-stateless5-2022-11-29",
196
  "greedy_search",
197
  4,
198
  "./test_wavs/tibetan/a_0_cacm-A70_31118.wav",
199
  ],
200
  # arabic
201
- [
202
- "Arabic",
203
- "AmirHussein/icefall-asr-mgb2-conformer_ctc-2022-27-06",
204
- "greedy_search",
205
- 4,
206
- "./test_wavs/arabic/a.wav",
207
- ],
208
  [
209
  "Arabic",
210
  "AmirHussein/icefall-asr-mgb2-conformer_ctc-2022-27-06",
@@ -226,11 +253,4 @@ examples = [
226
  4,
227
  "./test_wavs/german/20120315-0900-PLENARY-14-de_20120315.wav",
228
  ],
229
- [
230
- "German",
231
- "csukuangfj/wav2vec2.0-torchaudio",
232
- "greedy_search",
233
- 4,
234
- "./test_wavs/german/20170517-0900-PLENARY-16-de_20170517.wav",
235
- ],
236
  ]
 
16
  # See the License for the specific language governing permissions and
17
  # limitations under the License.
18
  examples = [
19
+ [
20
+ "Chinese+English",
21
+ "ptrnull/icefall-asr-conv-emformer-transducer-stateless2-zh",
22
+ "greedy_search",
23
+ 4,
24
+ "./test_wavs/tal_csasr/0.wav",
25
+ ],
26
+ [
27
+ "English",
28
+ "csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13",
29
+ "greedy_search",
30
+ 4,
31
+ "./test_wavs/librispeech/1089-134686-0001.wav",
32
+ ],
33
+ [
34
+ "Chinese",
35
+ "luomingshuang/icefall_asr_wenetspeech_pruned_transducer_stateless2",
36
+ "greedy_search",
37
+ 4,
38
+ "./test_wavs/wenetspeech/DEV_T0000000000.opus",
39
+ ],
40
+ [
41
+ "German",
42
+ "csukuangfj/wav2vec2.0-torchaudio",
43
+ "greedy_search",
44
+ 4,
45
+ "./test_wavs/german/20170517-0900-PLENARY-16-de_20170517.wav",
46
+ ],
47
+ [
48
+ "Arabic",
49
+ "AmirHussein/icefall-asr-mgb2-conformer_ctc-2022-27-06",
50
+ "greedy_search",
51
+ 4,
52
+ "./test_wavs/arabic/a.wav",
53
+ ],
54
+ [
55
+ "Tibetan",
56
+ "syzym/icefall-asr-xbmu-amdo31-pruned-transducer-stateless7-2022-12-02",
57
+ "greedy_search",
58
+ 4,
59
+ "./test_wavs/tibetan/a_0_cacm-A70_31117.wav",
60
+ ],
61
  # librispeech
62
  # https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless5-2022-05-13/tree/main/test_wavs
63
  [
 
196
  "./test_wavs/aidatatang_200zh/T0055G0036S0004.wav",
197
  ],
198
  # tal_csasr
 
199
  [
200
  "Chinese+English",
201
+ "ptrnull/icefall-asr-conv-emformer-transducer-stateless2-zh",
202
  "greedy_search",
203
  4,
204
  "./test_wavs/tal_csasr/210_36476_210_8341_1_1533271973_7057520_132.wav",
205
  ],
206
  [
207
  "Chinese+English",
208
+ "ptrnull/icefall-asr-conv-emformer-transducer-stateless2-zh",
209
  "greedy_search",
210
  4,
211
  "./test_wavs/tal_csasr/210_36476_210_8341_1_1533271973_7057520_138.wav",
212
  ],
213
  [
214
  "Chinese+English",
215
+ "ptrnull/icefall-asr-conv-emformer-transducer-stateless2-zh",
216
  "greedy_search",
217
  4,
218
  "./test_wavs/tal_csasr/210_36476_210_8341_1_1533271973_7057520_145.wav",
219
  ],
220
  [
221
  "Tibetan",
222
+ "syzym/icefall-asr-xbmu-amdo31-pruned-transducer-stateless7-2022-12-02",
223
  "greedy_search",
224
  4,
225
  "./test_wavs/tibetan/a_0_cacm-A70_31116.wav",
226
  ],
227
  [
228
  "Tibetan",
229
+ "syzym/icefall-asr-xbmu-amdo31-pruned-transducer-stateless7-2022-12-02",
 
 
 
 
 
 
 
230
  "greedy_search",
231
  4,
232
  "./test_wavs/tibetan/a_0_cacm-A70_31118.wav",
233
  ],
234
  # arabic
 
 
 
 
 
 
 
235
  [
236
  "Arabic",
237
  "AmirHussein/icefall-asr-mgb2-conformer_ctc-2022-27-06",
 
253
  4,
254
  "./test_wavs/german/20120315-0900-PLENARY-14-de_20120315.wav",
255
  ],
 
 
 
 
 
 
 
256
  ]
model.py CHANGED
@@ -262,20 +262,28 @@ def _get_wenetspeech_pre_trained_model(
262
 
263
 
264
  @lru_cache(maxsize=10)
265
- def _get_tal_csasr_pre_trained_model(
266
  repo_id: str,
267
  decoding_method: str,
268
  num_active_paths: int,
269
  ):
270
  assert repo_id in [
271
  "luomingshuang/icefall_asr_tal-csasr_pruned_transducer_stateless5",
 
272
  ], repo_id
273
 
 
 
 
 
 
 
 
274
  nn_model = _get_nn_model_filename(
275
  repo_id=repo_id,
276
- filename="cpu_jit.pt",
277
  )
278
- tokens = _get_token_filename(repo_id=repo_id)
279
 
280
  feat_config = sherpa.FeatureConfig()
281
  feat_config.fbank_opts.frame_opts.samp_freq = sample_rate
@@ -541,7 +549,8 @@ english_models = {
541
  }
542
 
543
  chinese_english_mixed_models = {
544
- "luomingshuang/icefall_asr_tal-csasr_pruned_transducer_stateless5": _get_tal_csasr_pre_trained_model, # noqa
 
545
  }
546
 
547
  tibetan_models = {
 
262
 
263
 
264
  @lru_cache(maxsize=10)
265
+ def _get_chinese_english_mixed_model(
266
  repo_id: str,
267
  decoding_method: str,
268
  num_active_paths: int,
269
  ):
270
  assert repo_id in [
271
  "luomingshuang/icefall_asr_tal-csasr_pruned_transducer_stateless5",
272
+ "ptrnull/icefall-asr-conv-emformer-transducer-stateless2-zh",
273
  ], repo_id
274
 
275
+ if repo_id == "luomingshuang/icefall_asr_tal-csasr_pruned_transducer_stateless5":
276
+ filename = "cpu_jit.pt"
277
+ subfolder = "data/lang_char"
278
+ elif repo_id == "ptrnull/icefall-asr-conv-emformer-transducer-stateless2-zh":
279
+ filename = "cpu_jit-epoch-11-avg-1.pt"
280
+ subfolder = "data/lang_char_bpe"
281
+
282
  nn_model = _get_nn_model_filename(
283
  repo_id=repo_id,
284
+ filename=filename,
285
  )
286
+ tokens = _get_token_filename(repo_id=repo_id, subfolder=subfolder)
287
 
288
  feat_config = sherpa.FeatureConfig()
289
  feat_config.fbank_opts.frame_opts.samp_freq = sample_rate
 
549
  }
550
 
551
  chinese_english_mixed_models = {
552
+ "ptrnull/icefall-asr-conv-emformer-transducer-stateless2-zh": _get_chinese_english_mixed_model,
553
+ "luomingshuang/icefall_asr_tal-csasr_pruned_transducer_stateless5": _get_chinese_english_mixed_model, # noqa
554
  }
555
 
556
  tibetan_models = {
test_wavs/tal_csasr/0.wav ADDED
Binary file (778 kB). View file