from transformers import AutoProcessor, MusicgenForConditionalGeneration import torch class EndpointHandler: def __init__(self, model_path="originstory/holisleigh", use_auth_token=None): # Load model and processor with consistent path self.processor = AutoProcessor.from_pretrained(model_path, use_auth_token=None) self.model = MusicgenForConditionalGeneration.from_pretrained("originstory/holisleigh", torch_dtype=torch.float16, use_auth_token=use_auth_token).to("cuda") def __call__(self, data: dict) -> dict: """ Args: data (dict): Contains the text prompt, vibe, style, and public domain song reference. """ try: # Extract user inputs text_prompt = data.get("text_prompt") vibe = data.get("vibe") style = data.get("style") song_reference = data.get("song_reference") # Combine user inputs to form the complete prompt combined_prompt = f"{vibe} {style} version of {song_reference}: {text_prompt}" # Process the prompt inputs = self.processor(text=[combined_prompt], padding=True, return_tensors="pt").to("cuda") # Generate music with torch.autocast("cuda"): audio_output = self.model.generate(**inputs) # Convert to suitable format audio_data = audio_output[0].cpu().numpy().tolist() # Return generated music return {"generated_audio": audio_data} except Exception as e: # Handle errors return {"error": str(e)} # Example usage handler = EndpointHandler()