from modules.SentenceSplitter import SentenceSplitter from modules.normalization import text_normalize from modules import generate_audio as generate import numpy as np from modules.speaker import Speaker def synthesize_audio( text: str, temperature: float = 0.3, top_P: float = 0.7, top_K: float = 20, spk: int | Speaker = -1, infer_seed: int = -1, use_decoder: bool = True, prompt1: str = "", prompt2: str = "", prefix: str = "", batch_size: int = 1, spliter_threshold: int = 100, ): if batch_size == 1: return generate.generate_audio( text, temperature=temperature, top_P=top_P, top_K=top_K, spk=spk, infer_seed=infer_seed, use_decoder=use_decoder, prompt1=prompt1, prompt2=prompt2, prefix=prefix, ) else: spliter = SentenceSplitter(spliter_threshold) sentences = spliter.parse(text) sentences = [text_normalize(s) for s in sentences] audio_data_batch = generate.generate_audio_batch( texts=sentences, temperature=temperature, top_P=top_P, top_K=top_K, spk=spk, infer_seed=infer_seed, use_decoder=use_decoder, prompt1=prompt1, prompt2=prompt2, prefix=prefix, ) sample_rate = audio_data_batch[0][0] audio_data = np.concatenate([data for _, data in audio_data_batch]) return sample_rate, audio_data