csukuangfj's picture
update
18a0605
raw
history blame
1.96 kB
#!/usr/bin/env python3
import json
import os
from typing import Any, Dict
import onnx
from iso639 import Lang
def add_meta_data(filename: str, meta_data: Dict[str, Any]):
"""Add meta data to an ONNX model. It is changed in-place.
Args:
filename:
Filename of the ONNX model to be changed.
meta_data:
Key-value pairs.
"""
model = onnx.load(filename)
for key, value in meta_data.items():
meta = model.metadata_props.add()
meta.key = key
meta.value = str(value)
onnx.save(model, filename)
def generate_tokens():
token2id = dict()
with open("phonemes.txt") as f:
for line in f:
line = line.strip()
if not line:
continue
idx, token = line.split()
if len(token) > 1:
print(f"skip token {token}")
continue
token2id[token] = int(idx)
token2id[" "] = 0
with open("tokens.txt", "w", encoding="utf-8") as f:
for s, i in token2id.items():
f.write(f"{s} {i}\n")
print("Generated tokens.txt")
def load_config(filename):
with open(filename, "r", encoding="utf-8") as file:
config = json.load(file)
return config
def main():
print("generate tokens")
generate_tokens()
lang = os.environ.get("LANG")
name = os.environ.get("NAME")
print(lang, name)
config = load_config(f"{lang}-{name}.onnx.json")
lang_iso = Lang(lang.split("_")[0])
print("add model metadata")
meta_data = {
"model_type": "vits",
"comment": "piper", # must be piper for models from piper or mimic3
"language": lang_iso.name,
"voice": lang_iso.pt1,
"has_espeak": 1,
"n_speakers": config["model"]["n_speakers"],
"sample_rate": config["audio"]["sample_rate"],
}
print(meta_data)
m = f"{lang}-{name}.onnx"
add_meta_data(m, meta_data)
main()