pkufool's picture
Update model.py
ecf7643
# Copyright 2022-2023 Xiaomi Corp. (authors: Fangjun Kuang)
#
# See LICENSE for clarification regarding multiple authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from huggingface_hub import hf_hub_download
os.system(
"cp -v /home/user/.local/lib/python3.8/site-packages/k2/lib/*.so /home/user/.local/lib/python3.8/site-packages/sherpa/lib/"
)
import sherpa # noqa
def _get_nn_model_filename(
repo_id: str,
filename: str,
subfolder: str = "exp",
) -> str:
nn_model_filename = hf_hub_download(
repo_id=repo_id,
filename=filename,
subfolder=subfolder,
)
return nn_model_filename
def _get_token_filename(
repo_id: str,
filename: str = "tokens.txt",
subfolder: str = "data/lang_char",
) -> str:
token_filename = hf_hub_download(
repo_id=repo_id,
filename=filename,
subfolder=subfolder,
)
return token_filename
def get_english_model_2022_12_19(repo_id: str):
encoder = _get_nn_model_filename(repo_id=repo_id, filename="encoder_jit_trace.pt")
decoder = _get_nn_model_filename(repo_id=repo_id, filename="decoder_jit_trace.pt")
joiner = _get_nn_model_filename(repo_id=repo_id, filename="joiner_jit_trace.pt")
tokens = _get_token_filename(repo_id=repo_id, subfolder="data/lang_bpe_500")
feat_config = sherpa.FeatureConfig()
feat_config.fbank_opts.frame_opts.samp_freq = 16000
feat_config.fbank_opts.mel_opts.num_bins = 80
feat_config.fbank_opts.frame_opts.dither = 0
config = sherpa.OnlineRecognizerConfig(
nn_model="",
encoder_model=encoder,
decoder_model=decoder,
joiner_model=joiner,
tokens=tokens,
use_gpu=False,
feat_config=feat_config,
decoding_method="greedy_search",
chunk_size=32,
)
recognizer = sherpa.OnlineRecognizer(config)
return recognizer
def get_chinese_english_mixed_model_conv_emformer_transducer_stateless2_zh(
repo_id: str,
) -> sherpa.OnlineRecognizer:
nn_model = _get_nn_model_filename(
repo_id=repo_id, filename="cpu_jit-epoch-11-avg-1.pt"
)
tokens = _get_token_filename(repo_id=repo_id, subfolder="data/lang_char_bpe")
feat_config = sherpa.FeatureConfig()
feat_config.fbank_opts.frame_opts.samp_freq = 16000
feat_config.fbank_opts.mel_opts.num_bins = 80
feat_config.fbank_opts.frame_opts.dither = 0
config = sherpa.OnlineRecognizerConfig(
nn_model=nn_model,
tokens=tokens,
use_gpu=False,
feat_config=feat_config,
decoding_method="greedy_search",
)
recognizer = sherpa.OnlineRecognizer(config)
return recognizer
def get_chinese_english_mixed_model_k2fsa_zipformer_chinese_english_mixed(
repo_id: str,
) -> sherpa.OnlineRecognizer:
encoder = _get_nn_model_filename(repo_id=repo_id, filename="encoder_jit_trace.pt")
decoder = _get_nn_model_filename(repo_id=repo_id, filename="decoder_jit_trace.pt")
joiner = _get_nn_model_filename(repo_id=repo_id, filename="joiner_jit_trace.pt")
tokens = _get_token_filename(repo_id=repo_id, subfolder="data/lang_char_bpe")
feat_config = sherpa.FeatureConfig()
feat_config.fbank_opts.frame_opts.samp_freq = 16000
feat_config.fbank_opts.mel_opts.num_bins = 80
feat_config.fbank_opts.frame_opts.dither = 0
config = sherpa.OnlineRecognizerConfig(
nn_model="",
encoder_model=encoder,
decoder_model=decoder,
joiner_model=joiner,
tokens=tokens,
use_gpu=False,
feat_config=feat_config,
decoding_method="greedy_search",
chunk_size=32,
)
recognizer = sherpa.OnlineRecognizer(config)
return recognizer
def create_recognizer(repo_id: str) -> sherpa.OnlineRecognizer:
if repo_id in english_models:
return english_models[repo_id](repo_id)
elif repo_id in chinese_english_mixed_models:
return chinese_english_mixed_models[repo_id](repo_id)
else:
raise ValueError(f"Unsupported repo_id: {repo_id}")
english_models = {
# https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29
# "Zengwei/icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29": get_english_model_2022_12_19
"desh2608/icefall-asr-librispeech-pruned-transducer-stateless7-streaming-small" : get_english_model_2022_12_19
}
chinese_english_mixed_models = {
# https://huggingface.co/pfluo/k2fsa-zipformer-chinese-english-mixed
"pfluo/k2fsa-zipformer-chinese-english-mixed": get_chinese_english_mixed_model_k2fsa_zipformer_chinese_english_mixed,
# https://huggingface.co/ptrnull/icefall-asr-conv-emformer-transducer-stateless2-zh
"ptrnull/icefall-asr-conv-emformer-transducer-stateless2-zh": get_chinese_english_mixed_model_conv_emformer_transducer_stateless2_zh,
}
all_models = {
**english_models,
**chinese_english_mixed_models,
}
language_to_models = {
"English": list(english_models.keys()),
"Chinese+English": list(chinese_english_mixed_models.keys()),
}