import logging | |
import torch | |
import librosa | |
import soundfile as sf | |
from io import BytesIO | |
from .config import pipe_tts | |
SAMPLING_RATE = 16000 | |
class T2A: | |
def __init__(self, input_text: str): | |
self.output_model = pipe_tts(input_text) | |
def __get_duration(self, raw: bytes): | |
chunk = BytesIO(raw) | |
audio, sample_rate = librosa.load(chunk, sr=SAMPLING_RATE) | |
duration = librosa.get_duration(y=audio, sr=sample_rate) | |
return duration | |
def get_audio(self): | |
try: | |
synth = self.output_model["audio"][0] | |
print(f"synth : {synth}") | |
with BytesIO() as buffer: | |
sf.write(buffer, synth, SAMPLING_RATE, format='wav') | |
output = buffer.getvalue() # bytes | |
print(f"type : {type(output)}") | |
duration = self.__get_duration(output) | |
print(f"duration : {duration}") | |
return output, SAMPLING_RATE, duration | |
except Exception as e: | |
logging.error(e) |