from typing import Dict, List, Any from transformers import pipeline import scipy.io.wavfile class EndpointHandler: def __init__(self, path=""): self.synthesiser = pipeline("text-generation", "suno/bark") # Attempt to create pipeline def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]: text_prompt = data.get("inputs") if not text_prompt: raise ValueError("Missing required 'inputs' field in request data.") try: speech = self.synthesiser(text_prompt, forward_params={"do_sample": True}) audio_data = speech["audio"] # Assuming audio is in a NumPy array sampling_rate = speech["sampling_rate"] # Return audio data as a byte string (adjust format as needed) audio_bytes = audio_data.tobytes() return {"audio": audio_bytes, "sampling_rate": sampling_rate} except Exception as e: # Handle potential errors with model loading or usage return {"error": str(e)}