|
from transformers import AutoProcessor, MusicgenForConditionalGeneration |
|
import torch |
|
|
|
class EndpointHandler: |
|
def __init__(self, model_path="originstory/holisleigh", use_auth_token=None): |
|
|
|
self.processor = AutoProcessor.from_pretrained(model_path, use_auth_token=None) |
|
self.model = MusicgenForConditionalGeneration.from_pretrained("originstory/holisleigh", torch_dtype=torch.float16, use_auth_token=use_auth_token).to("cuda") |
|
|
|
def __call__(self, data: dict) -> dict: |
|
""" |
|
Args: |
|
data (dict): Contains the text prompt, vibe, style, and public domain song reference. |
|
""" |
|
try: |
|
|
|
text_prompt = data.get("text_prompt") |
|
vibe = data.get("vibe") |
|
style = data.get("style") |
|
song_reference = data.get("song_reference") |
|
|
|
|
|
combined_prompt = f"{vibe} {style} version of {song_reference}: {text_prompt}" |
|
|
|
|
|
inputs = self.processor(text=[combined_prompt], padding=True, return_tensors="pt").to("cuda") |
|
|
|
|
|
with torch.autocast("cuda"): |
|
audio_output = self.model.generate(**inputs) |
|
|
|
|
|
audio_data = audio_output[0].cpu().numpy().tolist() |
|
|
|
|
|
return {"generated_audio": audio_data} |
|
except Exception as e: |
|
|
|
return {"error": str(e)} |
|
|
|
|
|
handler = EndpointHandler() |
|
|