File size: 1,662 Bytes

bd4f7b2
 
 
 
 
1129d6c
0af51be
2734ce5
bd4f7b2
 
 
 
 
c2a94ab
bd4f7b2
ce531bd
669b0a4
 
bd4f7b2
 
 
0af51be
bd4f7b2
 
 
 
 
 
 
 
 
 
 
 
 
 
1129d6c
0af51be
5fa79a1
bd4f7b2
 
1129d6c
 
 
 
 
 
 
 
 
a108533

from typing import Dict, Any
import logging
import torch
import soundfile as sf
from transformers import AutoTokenizer, AutoModelForTextToWaveform
import cloudinary.uploader


# Configure logging
logging.basicConfig(level=logging.DEBUG)
# Configure logging
logging.basicConfig(level=logging.WARNING)




class EndpointHandler():
    def __init__(self, path=""):
        
        self.tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-eng")
        self.model= AutoModelForTextToWaveform.from_pretrained("facebook/mms-tts-eng")
       
    def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
        # Prepare the payload with input data
        logging.warning(f"------input_data-- {str(data)}")
        payload = str(data)
        logging.warning(f"payload----{str(payload)}")
        # Set headers with API token
        inputs = self.tokenizer(payload, return_tensors="pt")

        # Generate the waveform from the input text
        with torch.no_grad():
            outputs = self.model(**inputs)

        # Save the audio to a file
        sf.write("StoryAudio.wav", outputs["waveform"][0].numpy(), self.model.config.sampling_rate)
        uploadGraphFile("StoryAudio.wav")
       
        #return 'StoryAudio.wav'
        # Check if the request was successful
        
def uploadGraphFile(fileName):
    # Configure Cloudinary credentials
    cloudinary.config( 
        cloud_name = "dm9tdqvp6", 
        api_key ="793865869491345", 
        api_secret = "0vhdvBoM35IWcO29NyI04Qj1PMo" 
    )
    # Upload a file to Cloudinary
    result = cloudinary.uploader.upload(fileName, folder="poc-graph", resource_type="raw")
    return result