gpt-sovits / Synthesizers /remote /Remote_Synthesizer.py
nekoaoxiang
添加代码至 df4c937
558c90a
import io, wave
import os, json, sys
import threading
from Synthesizers.base import Base_TTS_Synthesizer ,load_config
from .remote_task import Remote_TTS_Task as TTS_Task, set_based_synthesizer, get_ui_config
import requests
from urllib import parse
from datetime import datetime
from typing import Union, Generator, Tuple, Any, Optional, Dict, Literal
import numpy as np
import soundfile as sf
class Remote_Synthesizer(Base_TTS_Synthesizer):
url :str = "http://127.0.0.1:5000"
tts_endpoint:str = "/tts"
character_endpoint:str = "/character_list"
based_synthesizer :str = "gsv_fast"
class Config:
extra = "ignore"
def __init__(self, config_path:str = None, **kwargs):
super().__init__(**kwargs)
if config_path is None:
config_path = os.path.join(os.path.dirname(__file__), "configs", "config.json")
config_dict = load_config(config_path)
config_dict.update(kwargs)
for key, value in config_dict.items():
if hasattr(self, key):
setattr(self, key, value)
set_based_synthesizer(self.based_synthesizer)
self.ui_config = get_ui_config(self.based_synthesizer)
def get_characters(self)-> dict:
url = self.url + self.character_endpoint
res = requests.get(url)
return json.loads(res.text)
@staticmethod
def stream_audio(url, data: Dict[str, Any]) -> Generator[Tuple[int, np.ndarray], None, None]:
headers = {"Content-Type": "application/json"}
# 发起POST请求,获取响应流
response = requests.post(
url, data=json.dumps(data), headers=headers, stream=True
)
chunk_size = 1024
# 确保请求成功
if response.status_code == 200:
# 循环读取音频流
for chunk in response.iter_content(chunk_size):
# 将二进制数据转换为numpy数组,这里假设音频数据是16位整数格式
audiodata = np.frombuffer(chunk, dtype=np.int16)
yield 32000, audiodata
else:
raise Exception(
f"Failed to get audio stream, status code: {response.status_code}"
)
def generate(
self,
task: TTS_Task,
return_type: Literal["filepath", "numpy"] = "numpy",
save_path: Optional[str] = None,
) -> Union[str, Generator[Tuple[int, np.ndarray], None, None], Any]:
url = self.url + self.tts_endpoint
data = task.data
print(return_type)
if self.debug_mode:
print(f"generate task: \n{data}")
headers = {"Content-Type": "application/json"}
if return_type == "filepath" or (
return_type == "numpy" and not task.stream
):
if save_path is None:
save_path = f"tmp_audio/{datetime.now().strftime('%Y%m%d%H%M%S')}.wav"
res = requests.post(url, data=json.dumps(data), headers=headers)
if res.status_code == 200:
with open(save_path, "wb") as f:
f.write(res.content)
if return_type == "filepath":
return save_path
else:
audiodata, sr = sf.read(save_path)
return ((sr, audiodata) for _ in range(1))
else:
raise Exception(f"remote synthesizer error: {res.text}")
elif return_type == "numpy" and task.stream:
return self.stream_audio(url, data)
def params_parser(self, data) -> TTS_Task:
task = TTS_Task(based_synthesizer=self.based_synthesizer, **data)
return task
def ms_like_parser(self,data) -> TTS_Task:
task = TTS_Task(based_synthesizer=self.based_synthesizer, **data)
return task