# Import necessary libraries and configure settings import torch import ChatTTS import os,sys from common_test import * now_dir = os.getcwd() sys.path.append(now_dir) from tool.logger import get_logger torch._dynamo.config.cache_size_limit = 64 torch._dynamo.config.suppress_errors = True torch.set_float32_matmul_precision('high') logger= get_logger("api") # Initialize and load the model: chat = ChatTTS.Chat() if chat.load(source="custom", custom_path="D:\\chenjgspace\\ai-model\\chattts",coef=None): print("Models loaded successfully.") else: print("Models load failed.") sys.exit(1) # Define the text input for inference (Support Batching) texts = [ "我真的不敢相信,他那么年轻武功居然这么好", ] #使用随机种子数,会导致每次生成的音频文件都是随机的音色 rand_spk = chat.sample_random_speaker() print(rand_spk) # save it for later timbre recovery params_infer_code = ChatTTS.Chat.InferCodeParams( spk_emb = rand_spk, # add sampled speaker temperature = .3, # using custom temperature top_P = 0.7, # top P decode top_K = 20, # top K decode ) ################################### # For sentence level manual control. # use oral_(0-9), laugh_(0-2), break_(0-7) # to generate special token in text to synthesize. params_refine_text = ChatTTS.Chat.RefineTextParams( prompt='[oral_2][laugh_0][break_6]', ) wavs = chat.infer( texts, params_refine_text=params_refine_text, params_infer_code=params_infer_code, ) # Perform inference and play the generated audio #wavs = chat.infer(texts) #Audio(wavs[0], rate=24_000, autoplay=True) # Save the generated audio #torchaudio.save("D:\\Download\\output.wav", torch.from_numpy(wavs[0]), 24000) prefix_name = "D:\\Download\\" + get_date_time() for index, wav in enumerate(wavs): save_mp3_file(wav, index, prefix_name)