csukuangfj commited on
Commit
e6d227e
1 Parent(s): 0ae65b0

add a french model

Browse files
examples.py CHANGED
@@ -65,6 +65,13 @@ examples = [
65
  4,
66
  "./test_wavs/tibetan/a_0_cacm-A70_31117.wav",
67
  ],
 
 
 
 
 
 
 
68
  [
69
  "Chinese",
70
  "desh2608/icefall-asr-alimeeting-pruned-transducer-stateless7",
@@ -316,4 +323,18 @@ examples = [
316
  4,
317
  "./test_wavs/german/20120315-0900-PLENARY-14-de_20120315.wav",
318
  ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
319
  ]
 
65
  4,
66
  "./test_wavs/tibetan/a_0_cacm-A70_31117.wav",
67
  ],
68
+ [
69
+ "French",
70
+ "shaojieli/sherpa-onnx-streaming-zipformer-fr-2023-04-14",
71
+ "greedy_search",
72
+ 4,
73
+ "./test_wavs/french/common_voice_fr_19364697.wav",
74
+ ],
75
  [
76
  "Chinese",
77
  "desh2608/icefall-asr-alimeeting-pruned-transducer-stateless7",
 
323
  4,
324
  "./test_wavs/german/20120315-0900-PLENARY-14-de_20120315.wav",
325
  ],
326
+ [
327
+ "French",
328
+ "shaojieli/sherpa-onnx-streaming-zipformer-fr-2023-04-14",
329
+ "greedy_search",
330
+ 4,
331
+ "./test_wavs/french/common_voice_fr_19738183.wav",
332
+ ],
333
+ [
334
+ "French",
335
+ "shaojieli/sherpa-onnx-streaming-zipformer-fr-2023-04-14",
336
+ "greedy_search",
337
+ 4,
338
+ "./test_wavs/french/common_voice_fr_27024649.wav",
339
+ ],
340
  ]
model.py CHANGED
@@ -111,8 +111,31 @@ def decode_offline_recognizer_sherpa_onnx(
111
  return s.result.text.lower()
112
 
113
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  def decode(
115
- recognizer: Union[sherpa.OfflineRecognizer, sherpa.OnlineRecognizer],
 
 
 
 
 
116
  filename: str,
117
  ) -> str:
118
  if isinstance(recognizer, sherpa.OfflineRecognizer):
@@ -121,6 +144,8 @@ def decode(
121
  return decode_online_recognizer(recognizer, filename)
122
  elif isinstance(recognizer, sherpa_onnx.OfflineRecognizer):
123
  return decode_offline_recognizer_sherpa_onnx(recognizer, filename)
 
 
124
  else:
125
  raise ValueError(f"Unknown recognizer type {type(recognizer)}")
126
 
@@ -155,6 +180,10 @@ def get_pretrained_model(
155
  return german_models[repo_id](
156
  repo_id, decoding_method=decoding_method, num_active_paths=num_active_paths
157
  )
 
 
 
 
158
  elif repo_id in japanese_models:
159
  return japanese_models[repo_id](
160
  repo_id, decoding_method=decoding_method, num_active_paths=num_active_paths
@@ -654,6 +683,51 @@ def _get_german_pre_trained_model(
654
  return recognizer
655
 
656
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
657
  @lru_cache(maxsize=10)
658
  def _get_japanese_pre_trained_model(
659
  repo_id: str,
@@ -778,6 +852,10 @@ german_models = {
778
  "csukuangfj/wav2vec2.0-torchaudio": _get_german_pre_trained_model,
779
  }
780
 
 
 
 
 
781
  japanese_models = {
782
  "TeoWenShen/icefall-asr-csj-pruned-transducer-stateless7-streaming-230208-fluent": _get_japanese_pre_trained_model,
783
  "TeoWenShen/icefall-asr-csj-pruned-transducer-stateless7-streaming-230208-disfluent": _get_japanese_pre_trained_model,
@@ -791,6 +869,7 @@ all_models = {
791
  **tibetan_models,
792
  **arabic_models,
793
  **german_models,
 
794
  }
795
 
796
  language_to_models = {
@@ -801,4 +880,5 @@ language_to_models = {
801
  "Tibetan": list(tibetan_models.keys()),
802
  "Arabic": list(arabic_models.keys()),
803
  "German": list(german_models.keys()),
 
804
  }
 
111
  return s.result.text.lower()
112
 
113
 
114
+ def decode_online_recognizer_sherpa_onnx(
115
+ recognizer: sherpa_onnx.OnlineRecognizer,
116
+ filename: str,
117
+ ) -> str:
118
+ s = recognizer.create_stream()
119
+ samples, sample_rate = read_wave(filename)
120
+ s.accept_waveform(sample_rate, samples)
121
+
122
+ tail_paddings = np.zeros(int(0.3 * sample_rate), dtype=np.float32)
123
+ s.accept_waveform(sample_rate, tail_paddings)
124
+ s.input_finished()
125
+
126
+ while recognizer.is_ready(s):
127
+ recognizer.decode_stream(s)
128
+
129
+ return recognizer.get_result(s).lower()
130
+
131
+
132
  def decode(
133
+ recognizer: Union[
134
+ sherpa.OfflineRecognizer,
135
+ sherpa.OnlineRecognizer,
136
+ sherpa_onnx.OfflineRecognizer,
137
+ sherpa_onnx.OnlineRecognizer,
138
+ ],
139
  filename: str,
140
  ) -> str:
141
  if isinstance(recognizer, sherpa.OfflineRecognizer):
 
144
  return decode_online_recognizer(recognizer, filename)
145
  elif isinstance(recognizer, sherpa_onnx.OfflineRecognizer):
146
  return decode_offline_recognizer_sherpa_onnx(recognizer, filename)
147
+ elif isinstance(recognizer, sherpa_onnx.OnlineRecognizer):
148
+ return decode_online_recognizer_sherpa_onnx(recognizer, filename)
149
  else:
150
  raise ValueError(f"Unknown recognizer type {type(recognizer)}")
151
 
 
180
  return german_models[repo_id](
181
  repo_id, decoding_method=decoding_method, num_active_paths=num_active_paths
182
  )
183
+ elif repo_id in french_models:
184
+ return french_models[repo_id](
185
+ repo_id, decoding_method=decoding_method, num_active_paths=num_active_paths
186
+ )
187
  elif repo_id in japanese_models:
188
  return japanese_models[repo_id](
189
  repo_id, decoding_method=decoding_method, num_active_paths=num_active_paths
 
683
  return recognizer
684
 
685
 
686
+ @lru_cache(maxsize=10)
687
+ def _get_french_pre_trained_model(
688
+ repo_id: str,
689
+ decoding_method: str,
690
+ num_active_paths: int,
691
+ ):
692
+ assert repo_id in [
693
+ "shaojieli/sherpa-onnx-streaming-zipformer-fr-2023-04-14",
694
+ ], repo_id
695
+
696
+ encoder_model = _get_nn_model_filename(
697
+ repo_id=repo_id,
698
+ filename="encoder-epoch-29-avg-9-with-averaged-model.onnx",
699
+ subfolder=".",
700
+ )
701
+
702
+ decoder_model = _get_nn_model_filename(
703
+ repo_id=repo_id,
704
+ filename="decoder-epoch-29-avg-9-with-averaged-model.onnx",
705
+ subfolder=".",
706
+ )
707
+
708
+ joiner_model = _get_nn_model_filename(
709
+ repo_id=repo_id,
710
+ filename="joiner-epoch-29-avg-9-with-averaged-model.onnx",
711
+ subfolder=".",
712
+ )
713
+
714
+ tokens = _get_token_filename(repo_id=repo_id, subfolder=".")
715
+
716
+ recognizer = sherpa_onnx.OnlineRecognizer(
717
+ tokens=tokens,
718
+ encoder=encoder_model,
719
+ decoder=decoder_model,
720
+ joiner=joiner_model,
721
+ num_threads=1,
722
+ sample_rate=16000,
723
+ feature_dim=80,
724
+ decoding_method=decoding_method,
725
+ max_active_paths=num_active_paths,
726
+ )
727
+
728
+ return recognizer
729
+
730
+
731
  @lru_cache(maxsize=10)
732
  def _get_japanese_pre_trained_model(
733
  repo_id: str,
 
852
  "csukuangfj/wav2vec2.0-torchaudio": _get_german_pre_trained_model,
853
  }
854
 
855
+ french_models = {
856
+ "shaojieli/sherpa-onnx-streaming-zipformer-fr-2023-04-14": _get_french_pre_trained_model,
857
+ }
858
+
859
  japanese_models = {
860
  "TeoWenShen/icefall-asr-csj-pruned-transducer-stateless7-streaming-230208-fluent": _get_japanese_pre_trained_model,
861
  "TeoWenShen/icefall-asr-csj-pruned-transducer-stateless7-streaming-230208-disfluent": _get_japanese_pre_trained_model,
 
869
  **tibetan_models,
870
  **arabic_models,
871
  **german_models,
872
+ **french_models,
873
  }
874
 
875
  language_to_models = {
 
880
  "Tibetan": list(tibetan_models.keys()),
881
  "Arabic": list(arabic_models.keys()),
882
  "German": list(german_models.keys()),
883
+ "French": list(french_models.keys()),
884
  }
test_wavs/french/common_voice_fr_19364697.wav ADDED
Binary file (228 kB). View file
 
test_wavs/french/common_voice_fr_19738183.wav ADDED
Binary file (122 kB). View file
 
test_wavs/french/common_voice_fr_27024649.wav ADDED
Binary file (203 kB). View file
 
test_wavs/french/trans.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ common_voice_fr_19738183 CE DERNIER A ÉVOLUÉ TOUT AU LONG DE L'HISTOIRE ROMAINE
2
+ common_voice_fr_27024649 SON ACTIONNAIRE MAJORITAIRE EST LE CONSEIL TERRITORIAL DE SAINT PIERRE ET MIQUELON
3
+ common_voice_fr_19364697 CE SITE CONTIENT QUATRE TOMBEAUX DE LA DYNASTIE ACHÉMÉNIDE ET SEPT DES SASSANIDES