CineAI's picture
v0.0.1_DigItBMTH
d5436e0
raw
history blame
1.67 kB
import logging
from io import BytesIO
from typing import Optional
import librosa
import soundfile as sf
from streamlit_TTS import auto_play, text_to_audio
from .config import pipe_tts
SAMPLING_RATE = 16_000
class T2A:
def __init__(self, input_text: Optional[str] = None):
self.text = input_text
self.output_model = pipe_tts(input_text)
def __get_duration(self, raw: bytes) -> float:
chunk = BytesIO(raw)
audio, sample_rate = librosa.load(chunk, sr=SAMPLING_RATE)
duration = librosa.get_duration(y=audio, sr=sample_rate)
return duration
def autoplay(self, lang: str = "en") -> None:
if self.text is not None:
if isinstance(self.text, str):
audio = text_to_audio(self.text, language=lang)
auto_play(audio)
else: # more checking
text = f"Text you provide is {type(self.text)} accepted only string type"
audio = text_to_audio(text, language=lang)
auto_play(audio)
else:
raise Exception("Text is None")
def get_audio(self) -> tuple[bytes, int, float]:
try:
synth = self.output_model["audio"][0]
print(f"synth : {synth}")
with BytesIO() as buffer:
sf.write(buffer, synth, SAMPLING_RATE, format='wav')
output = buffer.getvalue() # bytes
print(f"type : {type(output)}")
duration = self.__get_duration(output)
print(f"duration : {duration}")
return output, SAMPLING_RATE, duration
except Exception as e:
logging.error(e)