Spaces:

chenmgtea
/

chat-tts

Running

File size: 1,892 Bytes

214ea91

# Import necessary libraries and configure settings

import torch
import ChatTTS
import os,sys
from common_test import  *

now_dir = os.getcwd()
sys.path.append(now_dir)
from tool.logger import get_logger


torch._dynamo.config.cache_size_limit = 64
torch._dynamo.config.suppress_errors = True
torch.set_float32_matmul_precision('high')

logger= get_logger("api")
# Initialize and load the model:
chat = ChatTTS.Chat()
if chat.load(source="custom", custom_path="D:\\chenjgspace\\ai-model\\chattts",coef=None):
    print("Models loaded successfully.")
else:
    print("Models load failed.")
    sys.exit(1)

# Define the text input for inference (Support Batching)
texts = [
    "我真的不敢相信，他那么年轻武功居然这么好",
    ]


#使用随机种子数,会导致每次生成的音频文件都是随机的音色
rand_spk = chat.sample_random_speaker()
print(rand_spk) # save it for later timbre recovery

params_infer_code = ChatTTS.Chat.InferCodeParams(
    spk_emb = rand_spk, # add sampled speaker
    temperature = .3,   # using custom temperature
    top_P = 0.7,        # top P decode
    top_K = 20,         # top K decode
)

###################################
# For sentence level manual control.

# use oral_(0-9), laugh_(0-2), break_(0-7)
# to generate special token in text to synthesize.
params_refine_text = ChatTTS.Chat.RefineTextParams(
    prompt='[oral_2][laugh_0][break_6]',
)

wavs = chat.infer(
    texts,
    params_refine_text=params_refine_text,
    params_infer_code=params_infer_code,
)


# Perform inference and play the generated audio
#wavs = chat.infer(texts)
#Audio(wavs[0], rate=24_000, autoplay=True)

# Save the generated audio
#torchaudio.save("D:\\Download\\output.wav", torch.from_numpy(wavs[0]), 24000)
prefix_name = "D:\\Download\\" + get_date_time()

for index, wav in enumerate(wavs):
    save_mp3_file(wav, index, prefix_name)