RVC-Speakers / speakers /processors /edge_to_voice.py
glide-the
Add application file
1f3bd14
raw
history blame
2.78 kB
from typing import Optional, Union, Dict
from speakers.common.registry import registry
from speakers.processors import BaseProcessor, ProcessorData
from io import BytesIO
import logging
import numpy as np
import edge_tts
import asyncio
import nest_asyncio
import util
import librosa
logger = logging.getLogger('edge_to_voice')
def set_edge_to_voice_logger(l):
global logger
logger = l
class EdgeProcessorData(ProcessorData):
"""
:param text: 生成文本
:param tts_speaker: 讲话人id
:param rate: 语速
:param volume: 语气轻重
"""
"""生成文本"""
text: str
"""讲话人id"""
tts_speaker: int
"""语速"""
rate: str
"""语气轻重"""
volume: str
@property
def type(self) -> str:
"""Type of the Message, used for serialization."""
return "EDGE"
@registry.register_processor("edge_to_voice")
class EdgeToVoice(BaseProcessor):
def __init__(self):
super().__init__()
nest_asyncio.apply()
self._tts_speakers_list = asyncio.get_event_loop().run_until_complete(edge_tts.list_voices()) # noqa
def __call__(
self,
data: EdgeProcessorData
):
if data.text is None:
raise RuntimeError('Please provide TTS text.')
if data.tts_speaker is None:
raise RuntimeError('Please provide TTS text.')
# 同步调用协程代码
tts_np, tts_sr = asyncio.get_event_loop().run_until_complete( self._call_edge_tts(data=data))
return tts_np, tts_sr
@property
def tts_speakers_list(self):
return self._tts_speakers_list
@classmethod
def from_config(cls, cfg=None):
if cfg is None:
raise RuntimeError("from_config cfg is None.")
return cls()
def match(self, data: ProcessorData):
return "EDGE" in data.type
async def _call_edge_tts(self, data: EdgeProcessorData):
speaker = self._tts_speakers_list[data.tts_speaker]['ShortName']
tts_com = edge_tts.Communicate(text=data.text, voice=speaker, rate=data.rate, volume=data.volume)
tts_raw = b''
# Stream TTS audio to bytes
async for chunk in tts_com.stream():
if chunk['type'] == 'audio':
tts_raw += chunk['data']
# Convert mp3 stream to wav
ffmpeg_proc = await asyncio.create_subprocess_exec(
'ffmpeg',
'-f', 'mp3',
'-i', '-',
'-f', 'wav',
'-loglevel', 'error',
'-',
stdin=asyncio.subprocess.PIPE,
stdout=asyncio.subprocess.PIPE
)
(tts_wav, _) = await ffmpeg_proc.communicate(tts_raw)
return librosa.load(BytesIO(tts_wav))