|
|
|
|
|
import collections |
|
import os |
|
from typing import Any, Dict |
|
|
|
import onnx |
|
from TTS.tts.configs.vits_config import VitsConfig |
|
from TTS.tts.models.vits import Vits |
|
|
|
|
|
def add_meta_data(filename: str, meta_data: Dict[str, Any]): |
|
"""Add meta data to an ONNX model. It is changed in-place. |
|
|
|
Args: |
|
filename: |
|
Filename of the ONNX model to be changed. |
|
meta_data: |
|
Key-value pairs. |
|
""" |
|
model = onnx.load(filename) |
|
for key, value in meta_data.items(): |
|
meta = model.metadata_props.add() |
|
meta.key = key |
|
meta.value = str(value) |
|
|
|
onnx.save(model, filename) |
|
|
|
|
|
lang_map = { |
|
"bg": "Bulgarian", |
|
"bn": "Bangla", |
|
"cs": "Czech", |
|
"da": "Danish", |
|
"de": "German", |
|
"el": "Greek", |
|
"es": "Spanish", |
|
"et": "Estonian", |
|
"fr": "French", |
|
"ga": "Irish", |
|
"fi": "Finnish", |
|
"hr": "Croatian", |
|
"hu": "Hungarian", |
|
"is": "Icelandic", |
|
"it": "Italian", |
|
"ka": "Georgian", |
|
"kk": "Kazakh", |
|
"lb": "Luxembourgish", |
|
"lt": "Lithuanian", |
|
"lv": "Latvian", |
|
"mt": "Maltese", |
|
"ne": "Nepali", |
|
"nl": "Dutch", |
|
"no": "Norwegian", |
|
"pl": "Polish", |
|
"pt": "Portuguese", |
|
"ro": "Romanian", |
|
"sl": "Slovenian", |
|
"sk": "Slovak", |
|
"sr": "Serbian", |
|
"sv": "Swedish", |
|
"sw": "Swahili", |
|
"tr": "Turkish", |
|
"uk": "Ukrainian", |
|
"vi": "Vietnamese", |
|
"zh": "Chinese", |
|
} |
|
|
|
|
|
def main(): |
|
lang = os.environ.get("LANG", None) |
|
if not lang: |
|
print("Please provide the environment variable LANG") |
|
return |
|
|
|
config = VitsConfig() |
|
config.load_json("config.json") |
|
|
|
|
|
vits = Vits.init_from_config(config) |
|
|
|
assert vits.config.use_phonemes is False, vits.config.use_phonemes |
|
assert vits.config.phonemizer is None, vits.config.phonemizer |
|
assert vits.config.phoneme_language is None, vits.config.phoneme_language |
|
|
|
vits.load_checkpoint(config, "model_file.pth") |
|
vits.export_onnx(output_path="model.onnx", verbose=False) |
|
|
|
language = lang_map[lang] |
|
|
|
meta_data = { |
|
"model_type": "vits", |
|
"comment": "coqui", |
|
"language": language, |
|
"frontend": "characters", |
|
"add_blank": int(vits.config.add_blank), |
|
"blank_id": vits.tokenizer.characters.blank_id, |
|
"n_speakers": vits.config.model_args.num_speakers, |
|
"use_eos_bos": int(vits.tokenizer.use_eos_bos), |
|
"bos_id": vits.tokenizer.characters.bos_id, |
|
"eos_id": vits.tokenizer.characters.eos_id, |
|
"pad_id": vits.tokenizer.characters.pad_id, |
|
"sample_rate": int(vits.ap.sample_rate), |
|
} |
|
print("meta_data", meta_data) |
|
add_meta_data(filename="model.onnx", meta_data=meta_data) |
|
|
|
|
|
all_upper_tokens = [i.upper() for i in vits.tokenizer.characters._char_to_id.keys()] |
|
duplicate = set( |
|
[ |
|
item |
|
for item, count in collections.Counter(all_upper_tokens).items() |
|
if count > 1 |
|
] |
|
) |
|
|
|
with open("tokens.txt", "w", encoding="utf-8") as f: |
|
for token, idx in vits.tokenizer.characters._char_to_id.items(): |
|
f.write(f"{token} {idx}\n") |
|
|
|
|
|
if ( |
|
token not in ("<PAD>", "<EOS>", "BOS", "<BLNK>") |
|
and token.lower() != token.upper() |
|
and len(token.upper()) == 1 |
|
and token.upper() not in duplicate |
|
): |
|
f.write(f"{token.upper()} {idx}\n") |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |
|
|