fffiloni commited on
Commit
d94e8fe
1 Parent(s): c7e6202

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -40
app.py CHANGED
@@ -16,13 +16,16 @@ from audiocraft.data.audio import audio_write
16
 
17
  MODEL = None
18
 
 
 
19
 
20
  def load_model(version):
21
  print("Loading model", version)
22
  return MusicGen.get_pretrained(version)
23
 
24
 
25
- def predict(model, text, melody, duration, topk, topp, temperature, cfg_coef):
 
26
  global MODEL
27
  topk = int(topk)
28
  if MODEL is None or MODEL.name != model:
@@ -57,8 +60,8 @@ def predict(model, text, melody, duration, topk, topp, temperature, cfg_coef):
57
  output = output.detach().cpu().float()[0]
58
  with NamedTemporaryFile("wb", suffix=".wav", delete=False) as file:
59
  audio_write(file.name, output, MODEL.sample_rate, strategy="loudness", add_suffix=False)
60
- waveform_video = gr.make_waveform(file.name)
61
- return waveform_video
62
 
63
 
64
  with gr.Blocks() as demo:
@@ -77,7 +80,7 @@ with gr.Blocks() as demo:
77
  with gr.Row():
78
  with gr.Column():
79
  with gr.Row():
80
- text = gr.Text(label="Input Text", interactive=True)
81
  melody = gr.Audio(source="upload", type="numpy", label="Melody Condition (optional)", interactive=True)
82
  with gr.Row():
83
  submit = gr.Button("Submit")
@@ -90,46 +93,15 @@ with gr.Blocks() as demo:
90
  topp = gr.Number(label="Top-p", value=0, interactive=True)
91
  temperature = gr.Number(label="Temperature", value=1.0, interactive=True)
92
  cfg_coef = gr.Number(label="Classifier Free Guidance", value=3.0, interactive=True)
93
- with gr.Column():
94
- output = gr.Video(label="Generated Music")
95
- submit.click(predict, inputs=[model, text, melody, duration, topk, topp, temperature, cfg_coef], outputs=[output])
96
- gr.Examples(
97
- fn=predict,
98
- examples=[
99
- [
100
- "An 80s driving pop song with heavy drums and synth pads in the background",
101
- "./assets/bach.mp3",
102
- "melody"
103
- ],
104
- [
105
- "A cheerful country song with acoustic guitars",
106
- "./assets/bolero_ravel.mp3",
107
- "melody"
108
- ],
109
- [
110
- "90s rock song with electric guitar and heavy drums",
111
- None,
112
- "medium"
113
- ],
114
- [
115
- "a light and cheerly EDM track, with syncopated drums, aery pads, and strong emotions",
116
- "./assets/bach.mp3",
117
- "melody"
118
- ],
119
- [
120
- "lofi slow bpm electro chill with organic samples",
121
- None,
122
- "medium",
123
- ],
124
- ],
125
- inputs=[text, melody, model],
126
- outputs=[output]
127
- )
128
  gr.Markdown(
129
  """
130
  ### More details
131
 
132
- The model will generate a short music extract based on the description you provided.
133
  You can generate up to 30 seconds of audio.
134
 
135
  We present 4 model variations:
 
16
 
17
  MODEL = None
18
 
19
+ img_to_text = gr.Blocks.load(name="spaces/fffiloni/CLIP-Interrogator-2")
20
+
21
 
22
  def load_model(version):
23
  print("Loading model", version)
24
  return MusicGen.get_pretrained(version)
25
 
26
 
27
+ def predict(model, uploaded_image, melody, duration, topk, topp, temperature, cfg_coef):
28
+ text = img_to_text(uploaded_image, 'best', 4, fn_index=1)[0]
29
  global MODEL
30
  topk = int(topk)
31
  if MODEL is None or MODEL.name != model:
 
60
  output = output.detach().cpu().float()[0]
61
  with NamedTemporaryFile("wb", suffix=".wav", delete=False) as file:
62
  audio_write(file.name, output, MODEL.sample_rate, strategy="loudness", add_suffix=False)
63
+ #waveform_video = gr.make_waveform(file.name)
64
+ return file.name
65
 
66
 
67
  with gr.Blocks() as demo:
 
80
  with gr.Row():
81
  with gr.Column():
82
  with gr.Row():
83
+ uploaded_image = gr.Image(label="Input Image", interactive=True, source="upload", type="filepath")
84
  melody = gr.Audio(source="upload", type="numpy", label="Melody Condition (optional)", interactive=True)
85
  with gr.Row():
86
  submit = gr.Button("Submit")
 
93
  topp = gr.Number(label="Top-p", value=0, interactive=True)
94
  temperature = gr.Number(label="Temperature", value=1.0, interactive=True)
95
  cfg_coef = gr.Number(label="Classifier Free Guidance", value=3.0, interactive=True)
96
+ with gr.Column():t
97
+ output = gr.Audio(label="Generated Music")
98
+ submit.click(predict, inputs=[model, uploaded_image, melody, duration, topk, topp, temperature, cfg_coef], outputs=[output])
99
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  gr.Markdown(
101
  """
102
  ### More details
103
 
104
+ The model will generate a short music extract based on the image you provided.
105
  You can generate up to 30 seconds of audio.
106
 
107
  We present 4 model variations: