CineAI's picture
Create T2A.py
4bb9300 verified
raw
history blame
748 Bytes
import torch
import soundfile as sf
from .config import model_mms_tts_eng, tokenizer_mms_tts_eng
SAMPLING_RATE = 16000
class T2A:
def __init__(self, input_text: str):
self.inputs = tokenizer_mms_tts_eng(input_text, return_tensors="pt")
def __call__(self):
if self.inputs is not None:
with torch.no_grad():
output_model = model_mms_tts_eng(**self.inputs)
audio = output_model["audio"][0]
with BytesIO() as buffer:
sf.write(buffer, audio, SAMPLING_RATE, format='wav')
output = buffer.getvalue() # bytes
return output
else:
raise Exception("Input text is None. Please provide text")