from typing import Dict, Any import logging import torch import soundfile as sf from transformers import AutoTokenizer, AutoModelForTextToWaveform import cloudinary.uploader # Configure logging logging.basicConfig(level=logging.DEBUG) # Configure logging logging.basicConfig(level=logging.WARNING) class EndpointHandler(): def __init__(self, path=""): self.tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-eng") self.model= AutoModelForTextToWaveform.from_pretrained("facebook/mms-tts-eng") def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]: # Prepare the payload with input data logging.warning(f"------input_data-- {str(data)}") payload = str(data) logging.warning(f"payload----{str(payload)}") # Set headers with API token inputs = self.tokenizer(payload, return_tensors="pt") # Generate the waveform from the input text with torch.no_grad(): outputs = self.model(**inputs) # Save the audio to a file sf.write("StoryAudio.wav", outputs["waveform"][0].numpy(), self.model.config.sampling_rate) uploadGraphFile("StoryAudio.wav") #return 'StoryAudio.wav' # Check if the request was successful def uploadGraphFile(fileName): # Configure Cloudinary credentials cloudinary.config( cloud_name = "dm9tdqvp6", api_key ="793865869491345", api_secret = "0vhdvBoM35IWcO29NyI04Qj1PMo" ) # Upload a file to Cloudinary result = cloudinary.uploader.upload(fileName, folder="poc-graph", resource_type="raw") return result