doodle-musegen

Runtime error

App Files Files Community

supermomo668 commited on Aug 6, 2023

Commit

692312c

•

1 Parent(s): 1ccfe17

hanlder

Browse files

Files changed (1) hide show

handler.py +110 -0

handler.py ADDED Viewed

	@@ -0,0 +1,110 @@

+from typing import Dict, List, Any
+from datasets import load_dataset
+from transformers import AutoProcessor, MusicgenForConditionalGeneration
+import torch, numpy as np
+import io
+import soundfile as sf
+from audiocraft.models import MusicGen
+import yaml
+import math
+import torchaudio
+import torch
+from audiocraft.utils.notebook import display_audio
+def get_bip_bip(
+	bip_duration=0.125, frequency=440, duration=0.5, sample_rate=32000, device="cuda"):
+    """Generates a series of bip bip at the given frequency."""
+    t = torch.arange(
+        int(duration * sample_rate), device="cuda", dtype=torch.float) / sample_rate
+    wav = torch.cos(2 * math.pi * 440 * t)[None]
+    tp = (t % (2 * bip_duration)) / (2 * bip_duration)
+    envelope = (tp >= 0.5).float()
+    return wav * envelope
+def load_conf(conf):
+  with open(conf,'r') as f:
+    conf= yaml.safeload(f)
+  return conf
+class generator:
+	def __init__(self, conf_file):
+		"""
+		conf{
+			model
+			sampling_rate
+		}
+		"""
+		self.conf = load_conf(conf_file)
+		self.processor = AutoProcessor.from_pretrained(self.conf['model'])
+		self.model = MusicGen.get_pretrained(self.conf['model'])
+		self.model.set_generation_params(
+			use_sampling=True,
+			top_k=250,
+			duration=self.conf['duration']
+		)
+		device = "cuda" if torch.cuda.is_available() else "cpu"
+		self.model.to(device)
+		self.sampling_rate = self.model.config.audio_encoder.sampling_rate
+	def preprocess(self, text, audio):
+		audio = audio[: int(len(audio) // self.conf['nth_slice_prompt'])]
+	def generate(self, text:list, audio: np.array, **kwargs):
+		"""
+		text: ["modern melodic electronic dance music", "80s blues track with groovy saxophone"]
+		audio (np.array)
+		"""
+		# inputs = self.processor(
+		# 	audio=audio,
+		# 	sampling_rate=self.conf["sampling_rate"],
+		# 	text=text,
+		# 	padding=True,
+		# 	return_tensors="pt",
+		# )
+		output = self.model.generate_with_chroma(
+			descriptions=[
+				text
+			],
+			melody_wavs=audio,
+			melody_sample_rate=self.conf['sampling_rate'],
+			progress=True
+		)
+		return output
+class EndpointHandler:
+	def __init__(self, path=""):
+		# load model and processor from path
+		self.processor = AutoProcessor.from_pretrained(path)
+		self.model = MusicgenForConditionalGeneration.from_pretrained(
+			path, torch_dtype=torch.float16).to("cuda")
+		self.generator = generator('conf.yaml')
+	def __call__(self, data: Dict[str, Any]) -> Dict[str, str]:
+		"""
+		Args:
+				data (:dict:):
+						The payload with the text prompt and generation parameters.
+		"""
+		prompt_duration = 2
+		# process input
+		text = data.pop("text", data)
+		audio = data.pop("audio", data)
+		parameters = data.pop("parameters", None)
+		audio, sr = sf.read(io.BytesIO(audio))
+		output = self.generate(text, audio, sr)
+		# # pass inputs with all kwargs in data
+		# if parameters is not None:
+		# 	with torch.autocast("cuda"):
+		# 			outputs = self.model.generate(**inputs, **parameters)
+		# else:
+		# 	with torch.autocast("cuda"):
+		# 			outputs = self.model.generate(**inputs,)
+		# postprocess the prediction
+		prediction = output.squeeze().cpu().numpy().tolist()
+		return [{"generated_audio": prediction}]