chat-tts / test /api.py
chenjgtea
提交代码
214ea91
# Import necessary libraries and configure settings
import torch
import ChatTTS
import os,sys
from common_test import *
now_dir = os.getcwd()
sys.path.append(now_dir)
from tool.logger import get_logger
torch._dynamo.config.cache_size_limit = 64
torch._dynamo.config.suppress_errors = True
torch.set_float32_matmul_precision('high')
logger= get_logger("api")
# Initialize and load the model:
chat = ChatTTS.Chat()
if chat.load(source="custom", custom_path="D:\\chenjgspace\\ai-model\\chattts",coef=None):
print("Models loaded successfully.")
else:
print("Models load failed.")
sys.exit(1)
# Define the text input for inference (Support Batching)
texts = [
"我真的不敢相信,他那么年轻武功居然这么好",
]
#使用随机种子数,会导致每次生成的音频文件都是随机的音色
rand_spk = chat.sample_random_speaker()
print(rand_spk) # save it for later timbre recovery
params_infer_code = ChatTTS.Chat.InferCodeParams(
spk_emb = rand_spk, # add sampled speaker
temperature = .3, # using custom temperature
top_P = 0.7, # top P decode
top_K = 20, # top K decode
)
###################################
# For sentence level manual control.
# use oral_(0-9), laugh_(0-2), break_(0-7)
# to generate special token in text to synthesize.
params_refine_text = ChatTTS.Chat.RefineTextParams(
prompt='[oral_2][laugh_0][break_6]',
)
wavs = chat.infer(
texts,
params_refine_text=params_refine_text,
params_infer_code=params_infer_code,
)
# Perform inference and play the generated audio
#wavs = chat.infer(texts)
#Audio(wavs[0], rate=24_000, autoplay=True)
# Save the generated audio
#torchaudio.save("D:\\Download\\output.wav", torch.from_numpy(wavs[0]), 24000)
prefix_name = "D:\\Download\\" + get_date_time()
for index, wav in enumerate(wavs):
save_mp3_file(wav, index, prefix_name)