from audiocraft.models import musicgen import torchaudio import soundfile as sf import os print("Loading model...") # model = musicgen.MusicGen.get_pretrained("melody") model = musicgen.MusicGen.get_pretrained("small") model.set_generation_params(duration=8) descriptions = ["Film score epic moment"] melody, sr = torchaudio.load("./asitwas_vocals.wav") print("Running inference...") # wav = model.generate_with_chroma(descriptions, melody[None].expand(1, -1, -1), sr) wav = model.generate(descriptions) # generates 3 samples. model_sampling_rate = 32000 if not os.path.exists("data"): os.makedirs("data") sf.write("data/output.wav", wav[0].numpy().T, model_sampling_rate)