mattricesound commited on
Commit
d84fb5f
1 Parent(s): 6ab4004

Change video to audio component. Rename filepaths

Browse files
Files changed (2) hide show
  1. .gitignore +2 -1
  2. app.py +42 -82
.gitignore CHANGED
@@ -159,4 +159,5 @@ cython_debug/
159
  # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160
  #.idea/
161
 
162
- data/
 
 
159
  # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160
  #.idea/
161
 
162
+ data/
163
+ temp/
app.py CHANGED
@@ -31,9 +31,7 @@ from demucs.audio import convert_audio
31
 
32
  MODEL = None # Last used model
33
  DEMUCS_MODEL = None
34
- IS_BATCHED = False
35
  MAX_BATCH_SIZE = 12
36
- BATCHED_DURATION = 15
37
  INTERRUPTING = False
38
  # We have to wrap subprocess call to clean a bit the log when using gr.make_waveform
39
  _old_call = sp.call
@@ -80,8 +78,8 @@ class FileCleaner:
80
  else:
81
  break
82
 
83
-
84
- file_cleaner = FileCleaner()
85
 
86
 
87
  def make_waveform(*args, **kwargs):
@@ -149,19 +147,34 @@ def _do_predictions(texts, melodies, duration, progress=False, **gen_kwargs):
149
 
150
  output = output.cpu()
151
  demucs_output = demucs_output.cpu()
152
- with NamedTemporaryFile("wb", suffix=".wav", delete=False) as file:
153
- audio_write(
154
- file.name, output, MODEL.sample_rate, strategy="loudness",
155
- loudness_headroom_db=16, loudness_compressor=True, add_suffix=False)
156
- out_files.append(pool.submit(make_waveform, file.name))
157
- file_cleaner.add(file.name)
158
- with NamedTemporaryFile("wb", suffix=".wav", delete=False) as file:
159
- audio_write(
160
- file.name, demucs_output, MODEL.sample_rate, strategy="loudness",
161
- loudness_headroom_db=16, loudness_compressor=True, add_suffix=False)
162
- out_files.append(pool.submit(make_waveform, file.name))
163
- file_cleaner.add(file.name)
164
- res = [out_file.result() for out_file in out_files]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
  for file in res:
166
  file_cleaner.add(file)
167
  print("batch finished", len(texts), time.time() - be)
@@ -169,13 +182,6 @@ def _do_predictions(texts, melodies, duration, progress=False, **gen_kwargs):
169
  return res
170
 
171
 
172
- def predict_batched(texts, melodies):
173
- max_text_length = 512
174
- texts = [text[:max_text_length] for text in texts]
175
- load_model('melody')
176
- res = _do_predictions(texts, melodies, BATCHED_DURATION)
177
- return [res]
178
-
179
 
180
  def predict_full(text, melody, duration, topk, topp, temperature, cfg_coef, progress=gr.Progress()):
181
  global INTERRUPTING
@@ -188,7 +194,6 @@ def predict_full(text, melody, duration, topk, topp, temperature, cfg_coef, prog
188
  raise gr.Error("Topp must be non-negative.")
189
 
190
  topk = int(topk)
191
- # load_model(model)
192
 
193
  def _progress(generated, to_generate):
194
  progress((generated, to_generate))
@@ -234,10 +239,17 @@ def ui_full(launch_kwargs):
234
  cfg_coef = gr.Number(label="Classifier Free Guidance", value=3.0, interactive=True)
235
  with gr.Column():
236
  with gr.Row():
237
- output_normal = gr.Video(label="Generated Music")
 
238
  with gr.Row():
239
- output_without_drum = gr.Video(label="Removed drums")
240
-
 
 
 
 
 
 
241
  submit.click(predict_full,
242
  inputs=[text, melody, duration, topk, topp, temperature, cfg_coef],
243
  outputs=[output_normal, output_without_drum])
@@ -262,56 +274,6 @@ def ui_full(launch_kwargs):
262
  interface.queue().launch(**launch_kwargs)
263
 
264
 
265
- def ui_batched(launch_kwargs):
266
- with gr.Blocks() as demo:
267
- gr.Markdown(
268
- """
269
- # MusicGen
270
-
271
- This is the demo for [MusicGen](https://github.com/facebookresearch/audiocraft),
272
- a simple and controllable model for music generation
273
- presented at: ["Simple and Controllable Music Generation"](https://huggingface.co/papers/2306.05284).
274
- <br/>
275
- <a href="https://huggingface.co/spaces/facebook/MusicGen?duplicate=true"
276
- style="display: inline-block;margin-top: .5em;margin-right: .25em;" target="_blank">
277
- <img style="margin-bottom: 0em;display: inline;margin-top: -.25em;"
278
- src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
279
- for longer sequences, more control and no queue.</p>
280
- """
281
- )
282
- with gr.Row():
283
- with gr.Column():
284
- with gr.Row():
285
- text = gr.Text(label="Describe your music", lines=2, interactive=True)
286
- with gr.Column():
287
- radio = gr.Radio(["file", "mic"], value="file",
288
- label="Condition on a melody (optional) File or Mic")
289
- melody = gr.Audio(source="upload", type="numpy", label="File",
290
- interactive=True, elem_id="melody-input")
291
- with gr.Row():
292
- submit = gr.Button("Generate")
293
- with gr.Column():
294
- output = gr.Video(label="Generated Music")
295
- submit.click(predict_batched, inputs=[text, melody],
296
- outputs=[output], batch=True, max_batch_size=MAX_BATCH_SIZE)
297
- radio.change(toggle_audio_src, radio, [melody], queue=False, show_progress=False)
298
- gr.Markdown("""
299
- ### More details
300
-
301
- The model will generate 12 seconds of audio based on the description you provided.
302
- You can optionaly provide a reference audio from which a broad melody will be extracted.
303
- The model will then try to follow both the description and melody provided.
304
- All samples are generated with the `melody` model.
305
-
306
- You can also use your own GPU or a Google Colab by following the instructions on our repo.
307
-
308
- See [github.com/facebookresearch/audiocraft](https://github.com/facebookresearch/audiocraft)
309
- for more details.
310
- """)
311
-
312
- demo.queue(max_size=8 * 4).launch(**launch_kwargs)
313
-
314
-
315
  if __name__ == "__main__":
316
  parser = argparse.ArgumentParser()
317
  parser.add_argument(
@@ -355,8 +317,6 @@ if __name__ == "__main__":
355
 
356
  # Load melody model
357
  load_model()
 
358
  # Show the interface
359
- if IS_BATCHED:
360
- ui_batched(launch_kwargs)
361
- else:
362
- ui_full(launch_kwargs)
 
31
 
32
  MODEL = None # Last used model
33
  DEMUCS_MODEL = None
 
34
  MAX_BATCH_SIZE = 12
 
35
  INTERRUPTING = False
36
  # We have to wrap subprocess call to clean a bit the log when using gr.make_waveform
37
  _old_call = sp.call
 
78
  else:
79
  break
80
 
81
+ # 10 minutes
82
+ file_cleaner = FileCleaner(600)
83
 
84
 
85
  def make_waveform(*args, **kwargs):
 
147
 
148
  output = output.cpu()
149
  demucs_output = demucs_output.cpu()
150
+
151
+ # Naming
152
+ filename = f"temp/{texts[0][:10]}.wav"
153
+ d_filename = f"temp/{texts[0][:10]}_demucs.wav"
154
+
155
+ # If path exists, add number. If number exists, update number.
156
+ i = 1
157
+ while Path(filename).exists():
158
+ filename = f"{texts[0][:10]}_{i}.wav"
159
+ d_filename = f"{texts[0][:10]}_{i}_demucs.wav"
160
+ i += 1
161
+
162
+ # with NamedTemporaryFile("wb", suffix=".wav", delete=False) as file:
163
+ audio_write(
164
+ filename, output, MODEL.sample_rate, strategy="loudness",
165
+ loudness_headroom_db=16, loudness_compressor=True, add_suffix=False)
166
+ # out_files.append(pool.submit(make_waveform, filename))
167
+ out_files.append(filename)
168
+ file_cleaner.add(filename)
169
+ # with NamedTemporaryFile("wb", suffix=".wav", delete=False) as file:
170
+ audio_write(
171
+ d_filename, demucs_output, MODEL.sample_rate, strategy="loudness",
172
+ loudness_headroom_db=16, loudness_compressor=True, add_suffix=False)
173
+ out_files.append(d_filename)
174
+ # out_files.append(pool.submit(make_waveform, d_filename))
175
+ file_cleaner.add(d_filename)
176
+ # res = [out_file.result() for out_file in out_files]
177
+ res = [out_file for out_file in out_files]
178
  for file in res:
179
  file_cleaner.add(file)
180
  print("batch finished", len(texts), time.time() - be)
 
182
  return res
183
 
184
 
 
 
 
 
 
 
 
185
 
186
  def predict_full(text, melody, duration, topk, topp, temperature, cfg_coef, progress=gr.Progress()):
187
  global INTERRUPTING
 
194
  raise gr.Error("Topp must be non-negative.")
195
 
196
  topk = int(topk)
 
197
 
198
  def _progress(generated, to_generate):
199
  progress((generated, to_generate))
 
239
  cfg_coef = gr.Number(label="Classifier Free Guidance", value=3.0, interactive=True)
240
  with gr.Column():
241
  with gr.Row():
242
+ # output_normal = gr.Video(label="Generated Music")
243
+ output_normal = gr.Audio(label="Generated Music")
244
  with gr.Row():
245
+ # output_without_drum = gr.Video(label="Removed drums")
246
+ output_without_drum = gr.Audio(label="Removed drums")
247
+ with gr.Row():
248
+ gr.Markdown(
249
+ """
250
+ Note that the files will be deleted after 10 minutes, so make sure to download!
251
+ """
252
+ )
253
  submit.click(predict_full,
254
  inputs=[text, melody, duration, topk, topp, temperature, cfg_coef],
255
  outputs=[output_normal, output_without_drum])
 
274
  interface.queue().launch(**launch_kwargs)
275
 
276
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
277
  if __name__ == "__main__":
278
  parser = argparse.ArgumentParser()
279
  parser.add_argument(
 
317
 
318
  # Load melody model
319
  load_model()
320
+ os.mkdir("temp")
321
  # Show the interface
322
+ ui_full(launch_kwargs)