Spaces:

CineAI
/

Chelsea

Sleeping

File size: 1,590 Bytes

9b0d264
 
4bb9300
3a802c4
4bb9300
9b0d264
0fb503b
9b0d264
4bb9300
d2150bd
 
4bb9300
 
 
be26bab
 
0fb503b
4bb9300
6298db6
4ac82ef
6298db6
 
 
 
be26bab
 
 
 
d2150bd
 
be26bab
d2150bd
 
 
 
 
5e83c71
9b0d264
0fb503b
06fe464
0fb503b
06fe464
4bb9300
0fb503b
4bb9300
 
6298db6
 
b3fbe5e
6298db6
 
06fe464
6298db6
9b0d264
b3fbe5e

import logging

import torch
import librosa
import soundfile as sf

from io import BytesIO
from .config import pipe_tts

from streamlit_TTS import auto_play, text_to_audio

SAMPLING_RATE = 16000

class T2A:
    def __init__(self, input_text: str = None):
        self.text = input_text
        self.output_model = pipe_tts(input_text)

    def __get_duration(self, raw: bytes):
        chunk = BytesIO(raw)
        audio, sample_rate = librosa.load(chunk, sr=SAMPLING_RATE)
        duration = librosa.get_duration(y=audio, sr=sample_rate)
        return duration

    def autoplay(self, lang: str = "en") -> None:
        if self.text is not None:
            if isinstance(self.text, str):
                audio = text_to_audio(self.text, language=lang)
                auto_play(audio)
            else:
                text = f"Text you provide is {type(self.text)} accepted only string type"
                audio = text_to_audio(text, language=language)
                auto_play(audio)
        else:
            raise Exception("Text is None")

    def get_audio(self):
        try:
            synth = self.output_model["audio"][0]

            print(f"synth : {synth}")

            with BytesIO() as buffer:
                sf.write(buffer, synth, SAMPLING_RATE, format='wav')
                output = buffer.getvalue() # bytes

            print(f"type : {type(output)}")

            duration = self.__get_duration(output)

            print(f"duration : {duration}")

            return output, SAMPLING_RATE, duration
        except Exception as e:
            logging.error(e)