|
import io |
|
from typing import Union |
|
from modules.SentenceSplitter import SentenceSplitter |
|
from modules.SynthesizeSegments import SynthesizeSegments, combine_audio_segments |
|
|
|
from modules import generate_audio as generate |
|
|
|
|
|
from modules.speaker import Speaker |
|
from modules.utils import audio |
|
|
|
|
|
def synthesize_audio( |
|
text: str, |
|
temperature: float = 0.3, |
|
top_P: float = 0.7, |
|
top_K: float = 20, |
|
spk: Union[int, Speaker] = -1, |
|
infer_seed: int = -1, |
|
use_decoder: bool = True, |
|
prompt1: str = "", |
|
prompt2: str = "", |
|
prefix: str = "", |
|
batch_size: int = 1, |
|
spliter_threshold: int = 100, |
|
): |
|
if batch_size == 1: |
|
return generate.generate_audio( |
|
text, |
|
temperature=temperature, |
|
top_P=top_P, |
|
top_K=top_K, |
|
spk=spk, |
|
infer_seed=infer_seed, |
|
use_decoder=use_decoder, |
|
prompt1=prompt1, |
|
prompt2=prompt2, |
|
prefix=prefix, |
|
) |
|
else: |
|
spliter = SentenceSplitter(spliter_threshold) |
|
sentences = spliter.parse(text) |
|
|
|
text_segments = [ |
|
{ |
|
"text": s, |
|
"params": { |
|
"text": s, |
|
"temperature": temperature, |
|
"top_P": top_P, |
|
"top_K": top_K, |
|
"spk": spk, |
|
"infer_seed": infer_seed, |
|
"use_decoder": use_decoder, |
|
"prompt1": prompt1, |
|
"prompt2": prompt2, |
|
"prefix": prefix, |
|
}, |
|
} |
|
for s in sentences |
|
] |
|
synthesizer = SynthesizeSegments(batch_size) |
|
audio_segments = synthesizer.synthesize_segments(text_segments) |
|
|
|
combined_audio = combine_audio_segments(audio_segments) |
|
|
|
return audio.pydub_to_np(combined_audio) |
|
|