csukuangfj
commited on
Commit
•
e6d227e
1
Parent(s):
0ae65b0
add a french model
Browse files
examples.py
CHANGED
@@ -65,6 +65,13 @@ examples = [
|
|
65 |
4,
|
66 |
"./test_wavs/tibetan/a_0_cacm-A70_31117.wav",
|
67 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
[
|
69 |
"Chinese",
|
70 |
"desh2608/icefall-asr-alimeeting-pruned-transducer-stateless7",
|
@@ -316,4 +323,18 @@ examples = [
|
|
316 |
4,
|
317 |
"./test_wavs/german/20120315-0900-PLENARY-14-de_20120315.wav",
|
318 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
319 |
]
|
|
|
65 |
4,
|
66 |
"./test_wavs/tibetan/a_0_cacm-A70_31117.wav",
|
67 |
],
|
68 |
+
[
|
69 |
+
"French",
|
70 |
+
"shaojieli/sherpa-onnx-streaming-zipformer-fr-2023-04-14",
|
71 |
+
"greedy_search",
|
72 |
+
4,
|
73 |
+
"./test_wavs/french/common_voice_fr_19364697.wav",
|
74 |
+
],
|
75 |
[
|
76 |
"Chinese",
|
77 |
"desh2608/icefall-asr-alimeeting-pruned-transducer-stateless7",
|
|
|
323 |
4,
|
324 |
"./test_wavs/german/20120315-0900-PLENARY-14-de_20120315.wav",
|
325 |
],
|
326 |
+
[
|
327 |
+
"French",
|
328 |
+
"shaojieli/sherpa-onnx-streaming-zipformer-fr-2023-04-14",
|
329 |
+
"greedy_search",
|
330 |
+
4,
|
331 |
+
"./test_wavs/french/common_voice_fr_19738183.wav",
|
332 |
+
],
|
333 |
+
[
|
334 |
+
"French",
|
335 |
+
"shaojieli/sherpa-onnx-streaming-zipformer-fr-2023-04-14",
|
336 |
+
"greedy_search",
|
337 |
+
4,
|
338 |
+
"./test_wavs/french/common_voice_fr_27024649.wav",
|
339 |
+
],
|
340 |
]
|
model.py
CHANGED
@@ -111,8 +111,31 @@ def decode_offline_recognizer_sherpa_onnx(
|
|
111 |
return s.result.text.lower()
|
112 |
|
113 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
114 |
def decode(
|
115 |
-
recognizer: Union[
|
|
|
|
|
|
|
|
|
|
|
116 |
filename: str,
|
117 |
) -> str:
|
118 |
if isinstance(recognizer, sherpa.OfflineRecognizer):
|
@@ -121,6 +144,8 @@ def decode(
|
|
121 |
return decode_online_recognizer(recognizer, filename)
|
122 |
elif isinstance(recognizer, sherpa_onnx.OfflineRecognizer):
|
123 |
return decode_offline_recognizer_sherpa_onnx(recognizer, filename)
|
|
|
|
|
124 |
else:
|
125 |
raise ValueError(f"Unknown recognizer type {type(recognizer)}")
|
126 |
|
@@ -155,6 +180,10 @@ def get_pretrained_model(
|
|
155 |
return german_models[repo_id](
|
156 |
repo_id, decoding_method=decoding_method, num_active_paths=num_active_paths
|
157 |
)
|
|
|
|
|
|
|
|
|
158 |
elif repo_id in japanese_models:
|
159 |
return japanese_models[repo_id](
|
160 |
repo_id, decoding_method=decoding_method, num_active_paths=num_active_paths
|
@@ -654,6 +683,51 @@ def _get_german_pre_trained_model(
|
|
654 |
return recognizer
|
655 |
|
656 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
657 |
@lru_cache(maxsize=10)
|
658 |
def _get_japanese_pre_trained_model(
|
659 |
repo_id: str,
|
@@ -778,6 +852,10 @@ german_models = {
|
|
778 |
"csukuangfj/wav2vec2.0-torchaudio": _get_german_pre_trained_model,
|
779 |
}
|
780 |
|
|
|
|
|
|
|
|
|
781 |
japanese_models = {
|
782 |
"TeoWenShen/icefall-asr-csj-pruned-transducer-stateless7-streaming-230208-fluent": _get_japanese_pre_trained_model,
|
783 |
"TeoWenShen/icefall-asr-csj-pruned-transducer-stateless7-streaming-230208-disfluent": _get_japanese_pre_trained_model,
|
@@ -791,6 +869,7 @@ all_models = {
|
|
791 |
**tibetan_models,
|
792 |
**arabic_models,
|
793 |
**german_models,
|
|
|
794 |
}
|
795 |
|
796 |
language_to_models = {
|
@@ -801,4 +880,5 @@ language_to_models = {
|
|
801 |
"Tibetan": list(tibetan_models.keys()),
|
802 |
"Arabic": list(arabic_models.keys()),
|
803 |
"German": list(german_models.keys()),
|
|
|
804 |
}
|
|
|
111 |
return s.result.text.lower()
|
112 |
|
113 |
|
114 |
+
def decode_online_recognizer_sherpa_onnx(
|
115 |
+
recognizer: sherpa_onnx.OnlineRecognizer,
|
116 |
+
filename: str,
|
117 |
+
) -> str:
|
118 |
+
s = recognizer.create_stream()
|
119 |
+
samples, sample_rate = read_wave(filename)
|
120 |
+
s.accept_waveform(sample_rate, samples)
|
121 |
+
|
122 |
+
tail_paddings = np.zeros(int(0.3 * sample_rate), dtype=np.float32)
|
123 |
+
s.accept_waveform(sample_rate, tail_paddings)
|
124 |
+
s.input_finished()
|
125 |
+
|
126 |
+
while recognizer.is_ready(s):
|
127 |
+
recognizer.decode_stream(s)
|
128 |
+
|
129 |
+
return recognizer.get_result(s).lower()
|
130 |
+
|
131 |
+
|
132 |
def decode(
|
133 |
+
recognizer: Union[
|
134 |
+
sherpa.OfflineRecognizer,
|
135 |
+
sherpa.OnlineRecognizer,
|
136 |
+
sherpa_onnx.OfflineRecognizer,
|
137 |
+
sherpa_onnx.OnlineRecognizer,
|
138 |
+
],
|
139 |
filename: str,
|
140 |
) -> str:
|
141 |
if isinstance(recognizer, sherpa.OfflineRecognizer):
|
|
|
144 |
return decode_online_recognizer(recognizer, filename)
|
145 |
elif isinstance(recognizer, sherpa_onnx.OfflineRecognizer):
|
146 |
return decode_offline_recognizer_sherpa_onnx(recognizer, filename)
|
147 |
+
elif isinstance(recognizer, sherpa_onnx.OnlineRecognizer):
|
148 |
+
return decode_online_recognizer_sherpa_onnx(recognizer, filename)
|
149 |
else:
|
150 |
raise ValueError(f"Unknown recognizer type {type(recognizer)}")
|
151 |
|
|
|
180 |
return german_models[repo_id](
|
181 |
repo_id, decoding_method=decoding_method, num_active_paths=num_active_paths
|
182 |
)
|
183 |
+
elif repo_id in french_models:
|
184 |
+
return french_models[repo_id](
|
185 |
+
repo_id, decoding_method=decoding_method, num_active_paths=num_active_paths
|
186 |
+
)
|
187 |
elif repo_id in japanese_models:
|
188 |
return japanese_models[repo_id](
|
189 |
repo_id, decoding_method=decoding_method, num_active_paths=num_active_paths
|
|
|
683 |
return recognizer
|
684 |
|
685 |
|
686 |
+
@lru_cache(maxsize=10)
|
687 |
+
def _get_french_pre_trained_model(
|
688 |
+
repo_id: str,
|
689 |
+
decoding_method: str,
|
690 |
+
num_active_paths: int,
|
691 |
+
):
|
692 |
+
assert repo_id in [
|
693 |
+
"shaojieli/sherpa-onnx-streaming-zipformer-fr-2023-04-14",
|
694 |
+
], repo_id
|
695 |
+
|
696 |
+
encoder_model = _get_nn_model_filename(
|
697 |
+
repo_id=repo_id,
|
698 |
+
filename="encoder-epoch-29-avg-9-with-averaged-model.onnx",
|
699 |
+
subfolder=".",
|
700 |
+
)
|
701 |
+
|
702 |
+
decoder_model = _get_nn_model_filename(
|
703 |
+
repo_id=repo_id,
|
704 |
+
filename="decoder-epoch-29-avg-9-with-averaged-model.onnx",
|
705 |
+
subfolder=".",
|
706 |
+
)
|
707 |
+
|
708 |
+
joiner_model = _get_nn_model_filename(
|
709 |
+
repo_id=repo_id,
|
710 |
+
filename="joiner-epoch-29-avg-9-with-averaged-model.onnx",
|
711 |
+
subfolder=".",
|
712 |
+
)
|
713 |
+
|
714 |
+
tokens = _get_token_filename(repo_id=repo_id, subfolder=".")
|
715 |
+
|
716 |
+
recognizer = sherpa_onnx.OnlineRecognizer(
|
717 |
+
tokens=tokens,
|
718 |
+
encoder=encoder_model,
|
719 |
+
decoder=decoder_model,
|
720 |
+
joiner=joiner_model,
|
721 |
+
num_threads=1,
|
722 |
+
sample_rate=16000,
|
723 |
+
feature_dim=80,
|
724 |
+
decoding_method=decoding_method,
|
725 |
+
max_active_paths=num_active_paths,
|
726 |
+
)
|
727 |
+
|
728 |
+
return recognizer
|
729 |
+
|
730 |
+
|
731 |
@lru_cache(maxsize=10)
|
732 |
def _get_japanese_pre_trained_model(
|
733 |
repo_id: str,
|
|
|
852 |
"csukuangfj/wav2vec2.0-torchaudio": _get_german_pre_trained_model,
|
853 |
}
|
854 |
|
855 |
+
french_models = {
|
856 |
+
"shaojieli/sherpa-onnx-streaming-zipformer-fr-2023-04-14": _get_french_pre_trained_model,
|
857 |
+
}
|
858 |
+
|
859 |
japanese_models = {
|
860 |
"TeoWenShen/icefall-asr-csj-pruned-transducer-stateless7-streaming-230208-fluent": _get_japanese_pre_trained_model,
|
861 |
"TeoWenShen/icefall-asr-csj-pruned-transducer-stateless7-streaming-230208-disfluent": _get_japanese_pre_trained_model,
|
|
|
869 |
**tibetan_models,
|
870 |
**arabic_models,
|
871 |
**german_models,
|
872 |
+
**french_models,
|
873 |
}
|
874 |
|
875 |
language_to_models = {
|
|
|
880 |
"Tibetan": list(tibetan_models.keys()),
|
881 |
"Arabic": list(arabic_models.keys()),
|
882 |
"German": list(german_models.keys()),
|
883 |
+
"French": list(french_models.keys()),
|
884 |
}
|
test_wavs/french/common_voice_fr_19364697.wav
ADDED
Binary file (228 kB). View file
|
|
test_wavs/french/common_voice_fr_19738183.wav
ADDED
Binary file (122 kB). View file
|
|
test_wavs/french/common_voice_fr_27024649.wav
ADDED
Binary file (203 kB). View file
|
|
test_wavs/french/trans.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
common_voice_fr_19738183 CE DERNIER A ÉVOLUÉ TOUT AU LONG DE L'HISTOIRE ROMAINE
|
2 |
+
common_voice_fr_27024649 SON ACTIONNAIRE MAJORITAIRE EST LE CONSEIL TERRITORIAL DE SAINT PIERRE ET MIQUELON
|
3 |
+
common_voice_fr_19364697 CE SITE CONTIENT QUATRE TOMBEAUX DE LA DYNASTIE ACHÉMÉNIDE ET SEPT DES SASSANIDES
|