Spaces:
Runtime error
Runtime error
File size: 4,537 Bytes
365b97f d3672e7 450547e 365b97f d3672e7 365b97f cb12a08 35f8d0a cb12a08 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 |
from audiocraft.models import MusicGen
from audiocraft.data.audio import audio_write
import gradio as gr
import numpy as np
import warnings
# from google.cloud import storage
class MusicGenHandler():
def __init__(self, init_model_path='createsafe/grimes-stem-model', generation_duration=30.0):
self.model_path = init_model_path
self.generation_duration = generation_duration
self._setup_model()
def _setup_model(self):
self.model = MusicGen.get_pretrained(self.model_path)
self.model.set_generation_params(duration=self.generation_duration)
def inference(self, prompts):
"""turns prompt or list of prompts into audio"""
if not isinstance(prompts, list):
prompts = list(prompts)
return self.model.generate(prompts).numpy()
def update_model(self, new_model_path):
if not new_model_path == self.model_path:
try:
self.model_path = new_model_path
self._setup_model()
except:
warnings.warn(f"could not setup model located at {new_model_path}")
model = MusicGenHandler()
def slider_val_to_text(val):
if val == 0:
return "none"
elif val == 0.1:
return "minimal"
elif val == 0.2:
return "little"
elif val == 0.3:
return "not much"
elif val == 0.4:
return "just below mean"
elif val == 0.5:
return "mean"
elif val == 0.6:
return "just above mean"
elif val == 0.7:
return "sufficient"
elif val == 0.8:
return "ample"
elif val == 0.9:
return "great"
elif val == 1:
return "maximal"
def text_to_music(text, instrument, brightness, percusiveness, business, variance, temperature, bass, mids, highs, tempo, noisiness):
dsp_feature_string = ""
if text:
dsp_feature_string += text + ". "
if instrument:
dsp_feature_string += instrument + ". "
if brightness:
dsp_feature_string += 'brightness ' + slider_val_to_text(brightness) + ', '
if percusiveness:
dsp_feature_string += 'percusiveness ' + slider_val_to_text(percusiveness) + ', '
if business:
dsp_feature_string += 'business ' + slider_val_to_text(business) + ', '
if variance:
dsp_feature_string += 'variance ' + slider_val_to_text(variance) + ', '
if temperature:
dsp_feature_string += 'temperature ' + slider_val_to_text(temperature) + ', '
if bass:
dsp_feature_string += 'bass ' + slider_val_to_text(bass) + ', '
if mids:
dsp_feature_string += 'mids ' + slider_val_to_text(mids) + ', '
if highs:
dsp_feature_string += 'highs ' + slider_val_to_text(highs) + ', '
if tempo:
dsp_feature_string += 'tempo ' + slider_val_to_text(tempo) + ', '
if noisiness:
dsp_feature_string += 'noisiness ' + slider_val_to_text(noisiness)
if instrument == "all-stems":
model.update_model(new_model_path='./cs-pretrained/stem_model')
elif instrument == "drums":
model.update_model(new_model_path='./cs-pretrained/drums_model')
audio = model.inference(prompts=[dsp_feature_string])
# convert to 16 bit PCM
if np.max(np.abs(audio)) > 0.0:
audio /= np.max(np.abs(audio))
audio *= 32767
audio = audio.astype(int)
return (32000, audio)
def run():
iface = gr.Interface(fn=text_to_music, inputs=[
gr.Textbox(
label="Text prompt"
),
gr.Dropdown(
["all-stems", "drums", "keys", "bass"], label="Instrument"
),
gr.Slider(
0, 1, step=0.1, label="Brightness"
),
gr.Slider(
0, 1, step=0.1, label="Percussiveness"
),
gr.Slider(
0, 1, step=0.1, label="Business"
),
gr.Slider(
0, 1, step=0.1, label="Variance"
),
gr.Slider(
0, 1, step=0.1, label="Temperature"
),
gr.Slider(
0, 1, step=0.1, label="Bass"
),
gr.Slider(
0, 1, step=0.1, label="Mids"
),
gr.Slider(
0, 1, step=0.1, label="Highs"
),
gr.Slider(
0, 1, step=0.1, label="Tempo"
),
gr.Slider(
0, 1, step=0.1, label="Noisiness"
),
], outputs="audio")
iface.launch()
# def __init__(self, init_model_path='./cs-pretrained/stem_model', generation_duration=30.0):
if __name__ == "__main__":
run()
|