add central model
Browse files- Dockerfile +1 -0
- infer_onnx.py +9 -7
- spk_to_id_2.json +1 -1
Dockerfile
CHANGED
@@ -44,6 +44,7 @@ RUN pip install -r requirements.txt
|
|
44 |
RUN huggingface-cli download BSC-LT/matcha-tts-cat-multiaccent matcha_multispeaker_cat_bal_opset_15_10_steps.onnx --local-dir $HOME/app/
|
45 |
RUN huggingface-cli download BSC-LT/matcha-tts-cat-multiaccent matcha_multispeaker_cat_occ_opset_15_10_steps.onnx --local-dir $HOME/app/
|
46 |
RUN huggingface-cli download BSC-LT/matcha-tts-cat-multiaccent matcha_multispeaker_cat_val_opset_15_10_steps.onnx --local-dir $HOME/app/
|
|
|
47 |
RUN huggingface-cli download BSC-LT/matcha-tts-cat-multiaccent config.yaml--local-dir $HOME/app/
|
48 |
|
49 |
RUN huggingface-cli download BSC-LT/vocos-mel-22khz-cat mel_spec_22khz_cat.onnx --local-dir $HOME/app/
|
|
|
44 |
RUN huggingface-cli download BSC-LT/matcha-tts-cat-multiaccent matcha_multispeaker_cat_bal_opset_15_10_steps.onnx --local-dir $HOME/app/
|
45 |
RUN huggingface-cli download BSC-LT/matcha-tts-cat-multiaccent matcha_multispeaker_cat_occ_opset_15_10_steps.onnx --local-dir $HOME/app/
|
46 |
RUN huggingface-cli download BSC-LT/matcha-tts-cat-multiaccent matcha_multispeaker_cat_val_opset_15_10_steps.onnx --local-dir $HOME/app/
|
47 |
+
RUN huggingface-cli download BSC-LT/matcha-tts-cat-multiaccent matcha_multispeaker_cat_cen_opset_15_10_steps.onnx --local-dir $HOME/app/
|
48 |
RUN huggingface-cli download BSC-LT/matcha-tts-cat-multiaccent config.yaml--local-dir $HOME/app/
|
49 |
|
50 |
RUN huggingface-cli download BSC-LT/vocos-mel-22khz-cat mel_spec_22khz_cat.onnx --local-dir $HOME/app/
|
infer_onnx.py
CHANGED
@@ -36,7 +36,7 @@ def process_text(i: int, text: str, device: torch.device, cleaner:str):
|
|
36 |
|
37 |
# paths
|
38 |
MODEL_PATH_MATCHA_MEL_BAL="matcha_multispeaker_cat_bal_opset_15_10_steps.onnx"
|
39 |
-
|
40 |
MODEL_PATH_MATCHA_MEL_OCC="matcha_multispeaker_cat_occ_opset_15_10_steps.onnx"
|
41 |
MODEL_PATH_MATCHA_MEL_VAL="matcha_multispeaker_cat_val_opset_15_10_steps.onnx"
|
42 |
MODEL_PATH_VOCOS="mel_spec_22khz_cat.onnx"
|
@@ -46,7 +46,7 @@ SPEAKER_ID_DICT="spk_to_id_2.json"
|
|
46 |
# Load models
|
47 |
sess_options = onnxruntime.SessionOptions()
|
48 |
model_matcha_mel_bal = onnxruntime.InferenceSession(str(MODEL_PATH_MATCHA_MEL_BAL), sess_options=sess_options, providers=["CPUExecutionProvider"])
|
49 |
-
|
50 |
model_matcha_mel_occ = onnxruntime.InferenceSession(str(MODEL_PATH_MATCHA_MEL_OCC), sess_options=sess_options, providers=["CPUExecutionProvider"])
|
51 |
model_matcha_mel_val = onnxruntime.InferenceSession(str(MODEL_PATH_MATCHA_MEL_VAL), sess_options=sess_options, providers=["CPUExecutionProvider"])
|
52 |
model_vocos = onnxruntime.InferenceSession(str(MODEL_PATH_VOCOS), sess_options=sess_options, providers=["CPUExecutionProvider"])
|
@@ -55,12 +55,14 @@ speaker_id_dict = json.load(open(SPEAKER_ID_DICT))
|
|
55 |
accents = [e for e in speaker_id_dict.keys()]
|
56 |
|
57 |
models={"balear":model_matcha_mel_bal,
|
58 |
-
"occidental": model_matcha_mel_occ,
|
59 |
-
"valencia": model_matcha_mel_val
|
|
|
60 |
|
61 |
cleaners={"balear": "catalan_balear_cleaners",
|
62 |
-
"occidental": "catalan_occidental_cleaners",
|
63 |
-
"valencia": "catalan_valencia_cleaners"
|
|
|
64 |
|
65 |
|
66 |
speakers = [sp for sp in speaker_id_dict[DEFAULT_ACCENT].keys()]
|
@@ -197,7 +199,7 @@ description = """
|
|
197 |
|
198 |
For vocoders we use [Vocos](https://huggingface.co/BSC-LT/vocos-mel-22khz-cat) trained in a catalan set of ~28 hours.
|
199 |
|
200 |
-
[Matcha](https://huggingface.co/BSC-LT/matcha-tts-cat-
|
201 |
|
202 |
"""
|
203 |
|
|
|
36 |
|
37 |
# paths
|
38 |
MODEL_PATH_MATCHA_MEL_BAL="matcha_multispeaker_cat_bal_opset_15_10_steps.onnx"
|
39 |
+
MODEL_PATH_MATCHA_MEL_CAT="matcha_multispeaker_cat_cen_opset_15_10_steps.onnx"
|
40 |
MODEL_PATH_MATCHA_MEL_OCC="matcha_multispeaker_cat_occ_opset_15_10_steps.onnx"
|
41 |
MODEL_PATH_MATCHA_MEL_VAL="matcha_multispeaker_cat_val_opset_15_10_steps.onnx"
|
42 |
MODEL_PATH_VOCOS="mel_spec_22khz_cat.onnx"
|
|
|
46 |
# Load models
|
47 |
sess_options = onnxruntime.SessionOptions()
|
48 |
model_matcha_mel_bal = onnxruntime.InferenceSession(str(MODEL_PATH_MATCHA_MEL_BAL), sess_options=sess_options, providers=["CPUExecutionProvider"])
|
49 |
+
model_matcha_mel_cat = onnxruntime.InferenceSession(str(MODEL_PATH_MATCHA_MEL_CAT), sess_options=sess_options, providers=["CPUExecutionProvider"])
|
50 |
model_matcha_mel_occ = onnxruntime.InferenceSession(str(MODEL_PATH_MATCHA_MEL_OCC), sess_options=sess_options, providers=["CPUExecutionProvider"])
|
51 |
model_matcha_mel_val = onnxruntime.InferenceSession(str(MODEL_PATH_MATCHA_MEL_VAL), sess_options=sess_options, providers=["CPUExecutionProvider"])
|
52 |
model_vocos = onnxruntime.InferenceSession(str(MODEL_PATH_VOCOS), sess_options=sess_options, providers=["CPUExecutionProvider"])
|
|
|
55 |
accents = [e for e in speaker_id_dict.keys()]
|
56 |
|
57 |
models={"balear":model_matcha_mel_bal,
|
58 |
+
"nord-occidental": model_matcha_mel_occ,
|
59 |
+
"valencia": model_matcha_mel_val,
|
60 |
+
"central": model_matcha_mel_val}
|
61 |
|
62 |
cleaners={"balear": "catalan_balear_cleaners",
|
63 |
+
"nord-occidental": "catalan_occidental_cleaners",
|
64 |
+
"valencia": "catalan_valencia_cleaners",
|
65 |
+
"central": "catalan_cleaners"}
|
66 |
|
67 |
|
68 |
speakers = [sp for sp in speaker_id_dict[DEFAULT_ACCENT].keys()]
|
|
|
199 |
|
200 |
For vocoders we use [Vocos](https://huggingface.co/BSC-LT/vocos-mel-22khz-cat) trained in a catalan set of ~28 hours.
|
201 |
|
202 |
+
[Matcha](https://huggingface.co/BSC-LT/matcha-tts-cat-multispeaker) was trained using openslr69 and festcat datasets
|
203 |
|
204 |
"""
|
205 |
|
spk_to_id_2.json
CHANGED
@@ -7,7 +7,7 @@
|
|
7 |
"grau": 0,
|
8 |
"elia": 1
|
9 |
},
|
10 |
-
"occidental":{
|
11 |
"pere": 0,
|
12 |
"emma": 1
|
13 |
},
|
|
|
7 |
"grau": 0,
|
8 |
"elia": 1
|
9 |
},
|
10 |
+
"nord-occidental":{
|
11 |
"pere": 0,
|
12 |
"emma": 1
|
13 |
},
|