|
import io |
|
from typing import Generator, Union |
|
|
|
import numpy as np |
|
|
|
from modules import generate_audio as generate |
|
from modules.SentenceSplitter import SentenceSplitter |
|
from modules.speaker import Speaker |
|
|
|
|
|
def synthesize_stream( |
|
text: str, |
|
temperature: float = 0.3, |
|
top_P: float = 0.7, |
|
top_K: float = 20, |
|
spk: Union[int, Speaker] = -1, |
|
infer_seed: int = -1, |
|
use_decoder: bool = True, |
|
prompt1: str = "", |
|
prompt2: str = "", |
|
prefix: str = "", |
|
spliter_threshold: int = 100, |
|
end_of_sentence="", |
|
) -> Generator[tuple[int, np.ndarray], None, None]: |
|
spliter = SentenceSplitter(spliter_threshold) |
|
sentences = spliter.parse(text) |
|
|
|
for sentence in sentences: |
|
wav_gen = generate.generate_audio_stream( |
|
text=sentence + end_of_sentence, |
|
temperature=temperature, |
|
top_P=top_P, |
|
top_K=top_K, |
|
spk=spk, |
|
infer_seed=infer_seed, |
|
use_decoder=use_decoder, |
|
prompt1=prompt1, |
|
prompt2=prompt2, |
|
prefix=prefix, |
|
) |
|
for sr, wav in wav_gen: |
|
yield sr, wav |
|
|