from typing import Dict, Any from textToStoryGeneration import * import logging import torch import soundfile as sf from transformers import AutoTokenizer, AutoModelForTextToWaveform # Configure logging logging.basicConfig(level=logging.DEBUG) # Configure logging logging.basicConfig(level=logging.ERROR) # Configure logging logging.basicConfig(level=logging.WARNING) class CustomHandler: def __init__(self): self.tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-eng") self.model= AutoModelForTextToWaveform.from_pretrained("facebook/mms-tts-eng") def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]: # Prepare the payload with input data logging.warning(f"------input_data-- {str(data)}") payload = str(data) logging.warning(f"payload----{str(payload)}") # Set headers with API token inputs = self.tokenizer(payload, return_tensors="pt") # Generate the waveform from the input text with torch.no_grad(): outputs = self.model(**inputs) # Save the audio to a file sf.write("StoryAudio.wav", outputs["waveform"][0].numpy(), self.model.config.sampling_rate) return 'StoryAudio.wav' # Check if the request was successful