Spaces:

CineAI
/

Chelsea

Sleeping

File size: 1,013 Bytes

9b0d264
 
4bb9300
3a802c4
4bb9300
9b0d264
0fb503b
9b0d264
4bb9300
 
 
 
 
0fb503b
4bb9300
6298db6
4ac82ef
6298db6
 
 
 
5e83c71
9b0d264
0fb503b
06fe464
0fb503b
06fe464
4bb9300
0fb503b
4bb9300
 
6298db6
 
b3fbe5e
6298db6
 
06fe464
6298db6
9b0d264
b3fbe5e

import logging

import torch
import librosa
import soundfile as sf

from io import BytesIO
from .config import pipe_tts

SAMPLING_RATE = 16000

class T2A:
    def __init__(self, input_text: str):
        self.output_model = pipe_tts(input_text)

    def __get_duration(self, raw: bytes):
        chunk = BytesIO(raw)
        audio, sample_rate = librosa.load(chunk, sr=SAMPLING_RATE)
        duration = librosa.get_duration(y=audio, sr=sample_rate)
        return duration

    def get_audio(self):
        try:
            synth = self.output_model["audio"][0]

            print(f"synth : {synth}")

            with BytesIO() as buffer:
                sf.write(buffer, synth, SAMPLING_RATE, format='wav')
                output = buffer.getvalue() # bytes

            print(f"type : {type(output)}")

            duration = self.__get_duration(output)

            print(f"duration : {duration}")

            return output, SAMPLING_RATE, duration
        except Exception as e:
            logging.error(e)