nakas commited on
Commit
0d37116
1 Parent(s): 28c85d1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -172
app.py CHANGED
@@ -1,18 +1,9 @@
1
- # Copyright (c) Meta Platforms, Inc. and affiliates.
2
- # All rights reserved.
3
-
4
- # This source code is licensed under the license found in the
5
- # LICENSE file in the root directory of this source tree.
6
-
7
- # Updated to account for UI changes from https://github.com/rkfg/audiocraft/blob/long/app.py
8
- # also released under the MIT license.
9
-
10
  import argparse
11
  from concurrent.futures import ProcessPoolExecutor
12
- import os
13
- from pathlib import Path
14
- import subprocess as sp
15
- from tempfile import NamedTemporaryFile
16
  import time
17
  import typing as tp
18
  import warnings
@@ -30,22 +21,6 @@ IS_BATCHED = "facebook/MusicGen" in os.environ.get('SPACE_ID', '')
30
  MAX_BATCH_SIZE = 6
31
  BATCHED_DURATION = 15
32
  INTERRUPTING = False
33
- # We have to wrap subprocess call to clean a bit the log when using gr.make_waveform
34
- _old_call = sp.call
35
-
36
-
37
- def _call_nostderr(*args, **kwargs):
38
- # Avoid ffmpeg vomitting on the logs.
39
- kwargs['stderr'] = sp.DEVNULL
40
- kwargs['stdout'] = sp.DEVNULL
41
- _old_call(*args, **kwargs)
42
-
43
-
44
- sp.call = _call_nostderr
45
- # Preallocating the pool of processes.
46
- pool = ProcessPoolExecutor(3)
47
- pool.__enter__()
48
-
49
 
50
  def interrupt():
51
  global INTERRUPTING
@@ -76,7 +51,6 @@ file_cleaner = FileCleaner()
76
 
77
 
78
  def make_waveform(*args, **kwargs):
79
- # Further remove some warnings.
80
  be = time.time()
81
  with warnings.catch_warnings():
82
  warnings.simplefilter('ignore')
@@ -181,10 +155,10 @@ def ui_full(launch_kwargs):
181
  with gr.Blocks() as interface:
182
  gr.Markdown(
183
  """
184
- # MusicGen
185
- This is your private demo for [MusicGen](https://github.com/facebookresearch/audiocraft),
186
- a simple and controllable model for music generation
187
- presented at: ["Simple and Controllable Music Generation"](https://huggingface.co/papers/2306.05284)
188
  """
189
  )
190
  with gr.Row():
@@ -193,28 +167,20 @@ def ui_full(launch_kwargs):
193
  text = gr.Text(label="Input Text", interactive=True)
194
  with gr.Column():
195
  radio = gr.Radio(["file", "mic"], value="file",
196
- label="Condition on a melody (optional) File or Mic")
197
- melody = gr.Audio(source="upload", type="numpy", label="File",
198
  interactive=True, elem_id="melody-input")
199
  with gr.Row():
200
- submit = gr.Button("Submit")
201
- # Adapted from https://github.com/rkfg/audiocraft/blob/long/app.py, MIT license.
202
- _ = gr.Button("Interrupt").click(fn=interrupt, queue=False)
203
- with gr.Row():
204
- model = gr.Radio(["melody", "medium", "small", "large"],
205
- label="Model", value="melody", interactive=True)
206
  with gr.Row():
207
- duration = gr.Slider(minimum=1, maximum=120, value=10, label="Duration", interactive=True)
208
- with gr.Row():
209
- topk = gr.Number(label="Top-k", value=250, interactive=True)
210
- topp = gr.Number(label="Top-p", value=0, interactive=True)
211
- temperature = gr.Number(label="Temperature", value=1.0, interactive=True)
212
- cfg_coef = gr.Number(label="Classifier Free Guidance", value=3.0, interactive=True)
213
- with gr.Column():
214
- output = gr.Video(label="Generated Music")
215
  submit.click(predict_full,
216
- inputs=[model, text, melody, duration, topk, topp, temperature, cfg_coef],
217
- outputs=[output])
218
  radio.change(toggle_audio_src, radio, [melody], queue=False, show_progress=False)
219
  gr.Examples(
220
  fn=predict_full,
@@ -222,117 +188,18 @@ def ui_full(launch_kwargs):
222
  [
223
  "An 80s driving pop song with heavy drums and synth pads in the background",
224
  "./assets/bach.mp3",
225
- "melody"
226
  ],
227
  [
228
  "A cheerful country song with acoustic guitars",
229
  "./assets/bolero_ravel.mp3",
230
- "melody"
231
  ],
232
  [
233
  "90s rock song with electric guitar and heavy drums",
234
  None,
235
- "medium"
236
  ],
237
  [
238
  "a light and cheerly EDM track, with syncopated drums, aery pads, and strong emotions",
239
  "./assets/bach.mp3",
240
- "melody"
241
- ],
242
- [
243
- "lofi slow bpm electro chill with organic samples",
244
- None,
245
- "medium",
246
- ],
247
- ],
248
- inputs=[text, melody, model],
249
- outputs=[output]
250
- )
251
- gr.Markdown(
252
- """
253
- ### More details
254
-
255
- The model will generate a short music extract based on the description you provided.
256
- The model can generate up to 30 seconds of audio in one pass. It is now possible
257
- to extend the generation by feeding back the end of the previous chunk of audio.
258
- This can take a long time, and the model might lose consistency. The model might also
259
- decide at arbitrary positions that the song ends.
260
-
261
- **WARNING:** Choosing long durations will take a long time to generate (2min might take ~10min).
262
- An overlap of 12 seconds is kept with the previously generated chunk, and 18 "new" seconds
263
- are generated each time.
264
-
265
- We present 4 model variations:
266
- 1. Melody -- a music generation model capable of generating music condition
267
- on text and melody inputs. **Note**, you can also use text only.
268
- 2. Small -- a 300M transformer decoder conditioned on text only.
269
- 3. Medium -- a 1.5B transformer decoder conditioned on text only.
270
- 4. Large -- a 3.3B transformer decoder conditioned on text only (might OOM for the longest sequences.)
271
-
272
- When using `melody`, ou can optionaly provide a reference audio from
273
- which a broad melody will be extracted. The model will then try to follow both
274
- the description and melody provided.
275
-
276
- You can also use your own GPU or a Google Colab by following the instructions on our repo.
277
- See [github.com/facebookresearch/audiocraft](https://github.com/facebookresearch/audiocraft)
278
- for more details.
279
- """
280
- )
281
-
282
- interface.queue().launch(**launch_kwargs)
283
-
284
-
285
- def ui_batched(launch_kwargs):
286
- with gr.Blocks() as demo:
287
- gr.Markdown(
288
- """
289
- # MusicGen
290
-
291
- This is the demo for [MusicGen](https://github.com/facebookresearch/audiocraft),
292
- a simple and controllable model for music generation
293
- presented at: ["Simple and Controllable Music Generation"](https://huggingface.co/papers/2306.05284).
294
- <br/>
295
- <a href="https://huggingface.co/spaces/facebook/MusicGen?duplicate=true"
296
- style="display: inline-block;margin-top: .5em;margin-right: .25em;" target="_blank">
297
- <img style="margin-bottom: 0em;display: inline;margin-top: -.25em;"
298
- src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
299
- for longer sequences, more control and no queue.</p>
300
- """
301
- )
302
- with gr.Row():
303
- with gr.Column():
304
- with gr.Row():
305
- text = gr.Text(label="Describe your music", lines=2, interactive=True)
306
- with gr.Column():
307
- radio = gr.Radio(["file", "mic"], value="file",
308
- label="Condition on a melody (optional) File or Mic")
309
- melody = gr.Audio(source="upload", type="numpy", label="File",
310
- interactive=True, elem_id="melody-input")
311
- with gr.Row():
312
- submit = gr.Button("Generate")
313
- with gr.Column():
314
- output = gr.Video(label="Generated Music")
315
- submit.click(predict_batched, inputs=[text, melody],
316
- outputs=[output], batch=True, max_batch_size=MAX_BATCH_SIZE)
317
- radio.change(toggle_audio_src, radio, [melody], queue=False, show_progress=False)
318
- gr.Examples(
319
- fn=predict_batched,
320
- examples=[
321
- [
322
- "An 80s driving pop song with heavy drums and synth pads in the background",
323
- "./assets/bach.mp3",
324
- ],
325
- [
326
- "A cheerful country song with acoustic guitars",
327
- "./assets/bolero_ravel.mp3",
328
- ],
329
- [
330
- "90s rock song with electric guitar and heavy drums",
331
- None,
332
- ],
333
- [
334
- "a light and cheerly EDM track, with syncopated drums, aery pads, and strong emotions bpm: 130",
335
- "./assets/bach.mp3",
336
  ],
337
  [
338
  "lofi slow bpm electro chill with organic samples",
@@ -340,24 +207,22 @@ def ui_batched(launch_kwargs):
340
  ],
341
  ],
342
  inputs=[text, melody],
343
- outputs=[output]
344
  )
345
- gr.Markdown("""
346
- ### More details
347
-
348
- The model will generate 12 seconds of audio based on the description you provided.
349
- You can optionaly provide a reference audio from which a broad melody will be extracted.
350
- The model will then try to follow both the description and melody provided.
351
- All samples are generated with the `melody` model.
352
-
353
- You can also use your own GPU or a Google Colab by following the instructions on our repo.
354
-
355
- See [github.com/facebookresearch/audiocraft](https://github.com/facebookresearch/audiocraft)
356
- for more details.
357
- """)
358
-
359
- demo.queue(max_size=8 * 4).launch(**launch_kwargs)
360
 
 
 
 
 
 
 
 
 
 
 
 
 
 
361
 
362
  if __name__ == "__main__":
363
  parser = argparse.ArgumentParser()
@@ -400,8 +265,4 @@ if __name__ == "__main__":
400
  if args.share:
401
  launch_kwargs['share'] = args.share
402
 
403
- # Show the interface
404
- if IS_BATCHED:
405
- ui_batched(launch_kwargs)
406
- else:
407
- ui_full(launch_kwargs)
 
1
+ import os
2
+ import gradio as gr
3
+ from scipy.io.wavfile import write
4
+ import subprocess
 
 
 
 
 
5
  import argparse
6
  from concurrent.futures import ProcessPoolExecutor
 
 
 
 
7
  import time
8
  import typing as tp
9
  import warnings
 
21
  MAX_BATCH_SIZE = 6
22
  BATCHED_DURATION = 15
23
  INTERRUPTING = False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  def interrupt():
26
  global INTERRUPTING
 
51
 
52
 
53
  def make_waveform(*args, **kwargs):
 
54
  be = time.time()
55
  with warnings.catch_warnings():
56
  warnings.simplefilter('ignore')
 
155
  with gr.Blocks() as interface:
156
  gr.Markdown(
157
  """
158
+ # MusicGen and Demucs Combination
159
+ This is a combined demo of MusicGen and Demucs.
160
+ MusicGen is a model for music generation based on text prompts,
161
+ and Demucs is a model for music source separation.
162
  """
163
  )
164
  with gr.Row():
 
167
  text = gr.Text(label="Input Text", interactive=True)
168
  with gr.Column():
169
  radio = gr.Radio(["file", "mic"], value="file",
170
+ label="Condition on a Melody (optional) File or Mic")
171
+ melody = gr.Audio(source="upload", type="numpy", label="Melody File",
172
  interactive=True, elem_id="melody-input")
173
  with gr.Row():
174
+ submit = gr.Button("Generate Music")
 
 
 
 
 
175
  with gr.Row():
176
+ audio_output = gr.Audio(type="numpy", label="Generated Music")
177
+ vocals_output = gr.Audio(type="filepath", label="Vocals")
178
+ bass_output = gr.Audio(type="filepath", label="Bass")
179
+ drums_output = gr.Audio(type="filepath", label="Drums")
180
+ other_output = gr.Audio(type="filepath", label="Other")
 
 
 
181
  submit.click(predict_full,
182
+ inputs=[text, melody, 10, 250, 0, 1.0, 3.0],
183
+ outputs=[audio_output, vocals_output, bass_output, drums_output, other_output])
184
  radio.change(toggle_audio_src, radio, [melody], queue=False, show_progress=False)
185
  gr.Examples(
186
  fn=predict_full,
 
188
  [
189
  "An 80s driving pop song with heavy drums and synth pads in the background",
190
  "./assets/bach.mp3",
 
191
  ],
192
  [
193
  "A cheerful country song with acoustic guitars",
194
  "./assets/bolero_ravel.mp3",
 
195
  ],
196
  [
197
  "90s rock song with electric guitar and heavy drums",
198
  None,
 
199
  ],
200
  [
201
  "a light and cheerly EDM track, with syncopated drums, aery pads, and strong emotions",
202
  "./assets/bach.mp3",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
203
  ],
204
  [
205
  "lofi slow bpm electro chill with organic samples",
 
207
  ],
208
  ],
209
  inputs=[text, melody],
210
+ outputs=[audio_output, vocals_output, bass_output, drums_output, other_output]
211
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
 
213
+ gr.Interface(
214
+ fn=inference,
215
+ inputs=gr.inputs.Audio(type="numpy", label="Input Audio"),
216
+ outputs=[
217
+ gr.outputs.Audio(type="filepath", label="Vocals"),
218
+ gr.outputs.Audio(type="filepath", label="Bass"),
219
+ gr.outputs.Audio(type="filepath", label="Drums"),
220
+ gr.outputs.Audio(type="filepath", label="Other"),
221
+ ],
222
+ title="MusicGen and Demucs Combination",
223
+ description="A combined demo of MusicGen and Demucs",
224
+ article="",
225
+ ).launch(enable_queue=True)
226
 
227
  if __name__ == "__main__":
228
  parser = argparse.ArgumentParser()
 
265
  if args.share:
266
  launch_kwargs['share'] = args.share
267
 
268
+ ui_full(launch_kwargs)