csukuangfj commited on
Commit
97ff6ba
1 Parent(s): 82c3ef1

add wenet models

Browse files
Files changed (1) hide show
  1. model.py +42 -0
model.py CHANGED
@@ -327,6 +327,46 @@ def _get_alimeeting_pre_trained_model(
327
  return recognizer
328
 
329
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
330
  @lru_cache(maxsize=10)
331
  def _get_aidatatang_200zh_pretrained_mode(
332
  repo_id: str,
@@ -448,6 +488,7 @@ chinese_models = {
448
  "yuekai/icefall-asr-aishell2-pruned-transducer-stateless5-B-2022-07-12": _get_aishell2_pretrained_model, # noqa
449
  "luomingshuang/icefall_asr_aidatatang-200zh_pruned_transducer_stateless2": _get_aidatatang_200zh_pretrained_mode, # noqa
450
  "luomingshuang/icefall_asr_alimeeting_pruned_transducer_stateless2": _get_alimeeting_pre_trained_model, # noqa
 
451
  }
452
 
453
  english_models = {
@@ -456,6 +497,7 @@ english_models = {
456
  "csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless8-2022-11-14": _get_librispeech_pre_trained_model, # noqa
457
  "csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless7-2022-11-11": _get_librispeech_pre_trained_model, # noqa
458
  "csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13": _get_librispeech_pre_trained_model, # noqa
 
459
  }
460
 
461
  chinese_english_mixed_models = {
 
327
  return recognizer
328
 
329
 
330
+ @lru_cache(maxsize=10)
331
+ def _get_wenet_model(
332
+ repo_id: str,
333
+ decoding_method: str,
334
+ num_active_paths: int,
335
+ ):
336
+ assert repo_id in [
337
+ "csukuangfj/wenet-chinese-model",
338
+ ], repo_id
339
+
340
+ nn_model = _get_nn_model_filename(
341
+ repo_id=repo_id,
342
+ filename="final.zip",
343
+ subfolder="./",
344
+ )
345
+ tokens = _get_token_filename(
346
+ repo_id=repo_id,
347
+ filename="units.txt",
348
+ subfolder="./",
349
+ )
350
+
351
+ feat_config = sherpa.FeatureConfig(normalize_samples=False)
352
+ feat_config.fbank_opts.frame_opts.samp_freq = sample_rate
353
+ feat_config.fbank_opts.mel_opts.num_bins = 80
354
+ feat_config.fbank_opts.frame_opts.dither = 0
355
+
356
+ config = sherpa.OfflineRecognizerConfig(
357
+ nn_model=nn_model,
358
+ tokens=tokens,
359
+ use_gpu=False,
360
+ feat_config=feat_config,
361
+ decoding_method=decoding_method,
362
+ num_active_paths=num_active_paths,
363
+ )
364
+
365
+ recognizer = sherpa.OfflineRecognizer(config)
366
+
367
+ return recognizer
368
+
369
+
370
  @lru_cache(maxsize=10)
371
  def _get_aidatatang_200zh_pretrained_mode(
372
  repo_id: str,
 
488
  "yuekai/icefall-asr-aishell2-pruned-transducer-stateless5-B-2022-07-12": _get_aishell2_pretrained_model, # noqa
489
  "luomingshuang/icefall_asr_aidatatang-200zh_pruned_transducer_stateless2": _get_aidatatang_200zh_pretrained_mode, # noqa
490
  "luomingshuang/icefall_asr_alimeeting_pruned_transducer_stateless2": _get_alimeeting_pre_trained_model, # noqa
491
+ "csukuangfj/wenet-chinese-model": _get_wenet_model,
492
  }
493
 
494
  english_models = {
 
497
  "csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless8-2022-11-14": _get_librispeech_pre_trained_model, # noqa
498
  "csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless7-2022-11-11": _get_librispeech_pre_trained_model, # noqa
499
  "csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13": _get_librispeech_pre_trained_model, # noqa
500
+ "csukuangfj/wenet-english-model": _get_wenet_model,
501
  }
502
 
503
  chinese_english_mixed_models = {