wetdog commited on
Commit
35c9c3f
1 Parent(s): 2dd2041

add central model

Browse files
Files changed (3) hide show
  1. Dockerfile +1 -0
  2. infer_onnx.py +9 -7
  3. spk_to_id_2.json +1 -1
Dockerfile CHANGED
@@ -44,6 +44,7 @@ RUN pip install -r requirements.txt
44
  RUN huggingface-cli download BSC-LT/matcha-tts-cat-multiaccent matcha_multispeaker_cat_bal_opset_15_10_steps.onnx --local-dir $HOME/app/
45
  RUN huggingface-cli download BSC-LT/matcha-tts-cat-multiaccent matcha_multispeaker_cat_occ_opset_15_10_steps.onnx --local-dir $HOME/app/
46
  RUN huggingface-cli download BSC-LT/matcha-tts-cat-multiaccent matcha_multispeaker_cat_val_opset_15_10_steps.onnx --local-dir $HOME/app/
 
47
  RUN huggingface-cli download BSC-LT/matcha-tts-cat-multiaccent config.yaml--local-dir $HOME/app/
48
 
49
  RUN huggingface-cli download BSC-LT/vocos-mel-22khz-cat mel_spec_22khz_cat.onnx --local-dir $HOME/app/
 
44
  RUN huggingface-cli download BSC-LT/matcha-tts-cat-multiaccent matcha_multispeaker_cat_bal_opset_15_10_steps.onnx --local-dir $HOME/app/
45
  RUN huggingface-cli download BSC-LT/matcha-tts-cat-multiaccent matcha_multispeaker_cat_occ_opset_15_10_steps.onnx --local-dir $HOME/app/
46
  RUN huggingface-cli download BSC-LT/matcha-tts-cat-multiaccent matcha_multispeaker_cat_val_opset_15_10_steps.onnx --local-dir $HOME/app/
47
+ RUN huggingface-cli download BSC-LT/matcha-tts-cat-multiaccent matcha_multispeaker_cat_cen_opset_15_10_steps.onnx --local-dir $HOME/app/
48
  RUN huggingface-cli download BSC-LT/matcha-tts-cat-multiaccent config.yaml--local-dir $HOME/app/
49
 
50
  RUN huggingface-cli download BSC-LT/vocos-mel-22khz-cat mel_spec_22khz_cat.onnx --local-dir $HOME/app/
infer_onnx.py CHANGED
@@ -36,7 +36,7 @@ def process_text(i: int, text: str, device: torch.device, cleaner:str):
36
 
37
  # paths
38
  MODEL_PATH_MATCHA_MEL_BAL="matcha_multispeaker_cat_bal_opset_15_10_steps.onnx"
39
- #MODEL_PATH_MATCHA_MEL_CAT=hf_hub_download(repo_id="BSC-LT/matcha-tts-cat-multispeaker", filename="matcha_multispeaker_cat_opset_15_10_steps_2399.onnx")
40
  MODEL_PATH_MATCHA_MEL_OCC="matcha_multispeaker_cat_occ_opset_15_10_steps.onnx"
41
  MODEL_PATH_MATCHA_MEL_VAL="matcha_multispeaker_cat_val_opset_15_10_steps.onnx"
42
  MODEL_PATH_VOCOS="mel_spec_22khz_cat.onnx"
@@ -46,7 +46,7 @@ SPEAKER_ID_DICT="spk_to_id_2.json"
46
  # Load models
47
  sess_options = onnxruntime.SessionOptions()
48
  model_matcha_mel_bal = onnxruntime.InferenceSession(str(MODEL_PATH_MATCHA_MEL_BAL), sess_options=sess_options, providers=["CPUExecutionProvider"])
49
- #model_matcha_mel_cat = onnxruntime.InferenceSession(str(MODEL_PATH_MATCHA_MEL_CAT), sess_options=sess_options, providers=["CPUExecutionProvider"])
50
  model_matcha_mel_occ = onnxruntime.InferenceSession(str(MODEL_PATH_MATCHA_MEL_OCC), sess_options=sess_options, providers=["CPUExecutionProvider"])
51
  model_matcha_mel_val = onnxruntime.InferenceSession(str(MODEL_PATH_MATCHA_MEL_VAL), sess_options=sess_options, providers=["CPUExecutionProvider"])
52
  model_vocos = onnxruntime.InferenceSession(str(MODEL_PATH_VOCOS), sess_options=sess_options, providers=["CPUExecutionProvider"])
@@ -55,12 +55,14 @@ speaker_id_dict = json.load(open(SPEAKER_ID_DICT))
55
  accents = [e for e in speaker_id_dict.keys()]
56
 
57
  models={"balear":model_matcha_mel_bal,
58
- "occidental": model_matcha_mel_occ,
59
- "valencia": model_matcha_mel_val}
 
60
 
61
  cleaners={"balear": "catalan_balear_cleaners",
62
- "occidental": "catalan_occidental_cleaners",
63
- "valencia": "catalan_valencia_cleaners"}
 
64
 
65
 
66
  speakers = [sp for sp in speaker_id_dict[DEFAULT_ACCENT].keys()]
@@ -197,7 +199,7 @@ description = """
197
 
198
  For vocoders we use [Vocos](https://huggingface.co/BSC-LT/vocos-mel-22khz-cat) trained in a catalan set of ~28 hours.
199
 
200
- [Matcha](https://huggingface.co/BSC-LT/matcha-tts-cat-onnx) was trained using openslr69 and festcat datasets
201
 
202
  """
203
 
 
36
 
37
  # paths
38
  MODEL_PATH_MATCHA_MEL_BAL="matcha_multispeaker_cat_bal_opset_15_10_steps.onnx"
39
+ MODEL_PATH_MATCHA_MEL_CAT="matcha_multispeaker_cat_cen_opset_15_10_steps.onnx"
40
  MODEL_PATH_MATCHA_MEL_OCC="matcha_multispeaker_cat_occ_opset_15_10_steps.onnx"
41
  MODEL_PATH_MATCHA_MEL_VAL="matcha_multispeaker_cat_val_opset_15_10_steps.onnx"
42
  MODEL_PATH_VOCOS="mel_spec_22khz_cat.onnx"
 
46
  # Load models
47
  sess_options = onnxruntime.SessionOptions()
48
  model_matcha_mel_bal = onnxruntime.InferenceSession(str(MODEL_PATH_MATCHA_MEL_BAL), sess_options=sess_options, providers=["CPUExecutionProvider"])
49
+ model_matcha_mel_cat = onnxruntime.InferenceSession(str(MODEL_PATH_MATCHA_MEL_CAT), sess_options=sess_options, providers=["CPUExecutionProvider"])
50
  model_matcha_mel_occ = onnxruntime.InferenceSession(str(MODEL_PATH_MATCHA_MEL_OCC), sess_options=sess_options, providers=["CPUExecutionProvider"])
51
  model_matcha_mel_val = onnxruntime.InferenceSession(str(MODEL_PATH_MATCHA_MEL_VAL), sess_options=sess_options, providers=["CPUExecutionProvider"])
52
  model_vocos = onnxruntime.InferenceSession(str(MODEL_PATH_VOCOS), sess_options=sess_options, providers=["CPUExecutionProvider"])
 
55
  accents = [e for e in speaker_id_dict.keys()]
56
 
57
  models={"balear":model_matcha_mel_bal,
58
+ "nord-occidental": model_matcha_mel_occ,
59
+ "valencia": model_matcha_mel_val,
60
+ "central": model_matcha_mel_val}
61
 
62
  cleaners={"balear": "catalan_balear_cleaners",
63
+ "nord-occidental": "catalan_occidental_cleaners",
64
+ "valencia": "catalan_valencia_cleaners",
65
+ "central": "catalan_cleaners"}
66
 
67
 
68
  speakers = [sp for sp in speaker_id_dict[DEFAULT_ACCENT].keys()]
 
199
 
200
  For vocoders we use [Vocos](https://huggingface.co/BSC-LT/vocos-mel-22khz-cat) trained in a catalan set of ~28 hours.
201
 
202
+ [Matcha](https://huggingface.co/BSC-LT/matcha-tts-cat-multispeaker) was trained using openslr69 and festcat datasets
203
 
204
  """
205
 
spk_to_id_2.json CHANGED
@@ -7,7 +7,7 @@
7
  "grau": 0,
8
  "elia": 1
9
  },
10
- "occidental":{
11
  "pere": 0,
12
  "emma": 1
13
  },
 
7
  "grau": 0,
8
  "elia": 1
9
  },
10
+ "nord-occidental":{
11
  "pere": 0,
12
  "emma": 1
13
  },