mattricesound commited on
Commit
e586364
1 Parent(s): e9d627c

Add midi option

Browse files
Files changed (2) hide show
  1. app.py +73 -18
  2. setup.py +3 -1
app.py CHANGED
@@ -29,8 +29,10 @@ from demucs import pretrained
29
  from demucs.apply import apply_model
30
  from demucs.audio import convert_audio
31
  from gradio_client import Client
 
32
 
33
  LOCAL = False
 
34
 
35
 
36
  MODEL = None # Last used model
@@ -44,9 +46,15 @@ _old_call = sp.call
44
  stem2idx = {'drums': 0, 'bass': 1, 'other': 2, 'vocal': 3}
45
  stem_idx = torch.LongTensor([stem2idx['vocal'], stem2idx['other'], stem2idx['bass']])
46
 
47
- melody_files = glob.glob('clips/**/*.mp3', recursive=True)
 
 
48
 
49
 
 
 
 
 
50
  def _call_nostderr(*args, **kwargs):
51
  # Avoid ffmpeg vomitting on the logs.
52
  kwargs['stderr'] = sp.DEVNULL
@@ -183,6 +191,7 @@ def _do_predictions(texts, melodies, duration, progress=False, **gen_kwargs):
183
 
184
 
185
  def predict_full(text, melody, progress=gr.Progress()):
 
186
  global INTERRUPTING
187
  INTERRUPTING = False
188
  print("Running local model")
@@ -194,17 +203,45 @@ def predict_full(text, melody, progress=gr.Progress()):
194
 
195
  outs = _do_predictions(
196
  [text], [melody], duration=10, progress=True)
197
-
198
  return outs[0], gr.File.update(value=outs[0], visible=True)
199
 
200
 
201
 
202
  def select_new_melody():
203
- new_melody_file = np.random.choice(melody_files)
204
- return gr.update(source="upload", value=new_melody_file)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
 
206
  def run_remote_model(text, melody):
207
- print("Running Audiocraft API model with text", text, "and melody", melody)
 
208
  result = client.predict(
209
  text, # str in 'Describe your music' Textbox component
210
  melody, # str (filepath or URL to file) in 'File' Audio component
@@ -223,6 +260,8 @@ def run_remote_model(text, melody):
223
  sp.run(["ffmpeg", "-i", result, "-vn", "-acodec", "pcm_s16le", "-ar", "32000", "-ac", "1", d_filename])
224
  # Load wav file
225
  output, sr = audio_read(d_filename)
 
 
226
  # Demucs
227
  print("Running demucs")
228
  wav = convert_audio(output, sr, DEMUCS_MODEL.samplerate, DEMUCS_MODEL.audio_channels)
@@ -241,9 +280,14 @@ def run_remote_model(text, melody):
241
  d_filename, demucs_output, 32000, strategy="loudness",
242
  loudness_headroom_db=16, loudness_compressor=True, add_suffix=False)
243
  file_cleaner.add(d_filename)
 
 
244
  print("Finished", text)
245
  print("Tempfiles currently stored: ", len(file_cleaner.files))
246
- return d_filename, gr.File.update(value=d_filename, visible=True)
 
 
 
247
 
248
  def ui_full(launch_kwargs):
249
  with gr.Blocks() as interface:
@@ -262,8 +306,8 @@ def ui_full(launch_kwargs):
262
  audio_type="numpy"
263
  else:
264
  audio_type="filepath"
265
- melody = gr.Audio(type=audio_type, label="File",
266
- interactive=True, elem_id="melody-input", value="clips/chipmunk.wav")
267
  new_melody = gr.Button("New Melody", interactive=True)
268
  with gr.Row():
269
  submit = gr.Button("Submit")
@@ -272,26 +316,33 @@ def ui_full(launch_kwargs):
272
 
273
  with gr.Column():
274
  output_without_drum = gr.Audio(label="Output")
275
- file_download_no_drum = gr.File(label="Download", visible=False)
276
- gr.Markdown(
277
- """
278
- Note that the files will be deleted after 10 minutes, so make sure to download!
279
- """
280
- )
 
 
 
 
281
  if LOCAL:
282
  submit.click(predict_full,
283
  inputs=[text, melody],
284
- outputs=[output_without_drum, file_download_no_drum])
285
  else:
286
- submit.click(run_remote_model, inputs=[text, melody], outputs=[output_without_drum, file_download_no_drum])
287
  new_melody.click(select_new_melody, outputs=[melody])
 
 
 
 
288
  gr.Examples(
289
  fn=predict_full,
290
  examples=[
291
  ["Enchanting Flute Trills amidst Misty String Section"],
292
  ["Gliding Mellotron Strings over Vibrant Phrases"],
293
  ["Synth Brass Melody Floating over Airy Wind Chimes"],
294
- ["Echoing Electric Guitar Licks with Ethereal Vocal Chops"],
295
  ["Rhythmic Acoustic Guitar Licks with Echoing Layers"],
296
  ["Whimsical Flute Flourishes in a Mystical Forest Glade"],
297
  ["Airy Piccolo Trills accompanied by Floating Harp Arpeggios"],
@@ -300,7 +351,7 @@ def ui_full(launch_kwargs):
300
  ["Enchanting Kalimba Melodies atop Mystical Atmosphere"],
301
  ],
302
  inputs=[text],
303
- outputs=[output_without_drum, file_download_no_drum]
304
  )
305
 
306
  interface.queue().launch(**launch_kwargs)
@@ -315,6 +366,7 @@ if __name__ == "__main__":
315
  help='IP to listen on for connections to Gradio',
316
  )
317
  parser.add_argument("--local", action="store_true", help="Run locally instead of using API")
 
318
 
319
  args = parser.parse_args()
320
 
@@ -322,6 +374,9 @@ if __name__ == "__main__":
322
  launch_kwargs['server_name'] = args.listen
323
 
324
  LOCAL = args.local
 
 
 
325
  # Load melody model
326
  load_model()
327
  if not LOCAL:
 
29
  from demucs.apply import apply_model
30
  from demucs.audio import convert_audio
31
  from gradio_client import Client
32
+ import pretty_midi
33
 
34
  LOCAL = False
35
+ USE_MIDI = True
36
 
37
 
38
  MODEL = None # Last used model
 
46
  stem2idx = {'drums': 0, 'bass': 1, 'other': 2, 'vocal': 3}
47
  stem_idx = torch.LongTensor([stem2idx['vocal'], stem2idx['other'], stem2idx['bass']])
48
 
49
+ melody_files = list(glob.glob('clips/**/*.wav', recursive=True))
50
+ midi_files = list(glob.glob('clips/**/*.mid', recursive=True))
51
+ crops = [(0, 5), (0, 10), (0, 15)]
52
 
53
 
54
+ selected_melody = ""
55
+ selected_crop = None
56
+ selected_text = ""
57
+
58
  def _call_nostderr(*args, **kwargs):
59
  # Avoid ffmpeg vomitting on the logs.
60
  kwargs['stderr'] = sp.DEVNULL
 
191
 
192
 
193
  def predict_full(text, melody, progress=gr.Progress()):
194
+ global selected_text
195
  global INTERRUPTING
196
  INTERRUPTING = False
197
  print("Running local model")
 
203
 
204
  outs = _do_predictions(
205
  [text], [melody], duration=10, progress=True)
206
+ selected_text = text
207
  return outs[0], gr.File.update(value=outs[0], visible=True)
208
 
209
 
210
 
211
  def select_new_melody():
212
+ global selected_melody
213
+ with NamedTemporaryFile("wb", suffix=".wav", delete=False) as file:
214
+ if not USE_MIDI:
215
+ new_melody_file = np.random.choice(melody_files)
216
+ selected_melody = new_melody_file
217
+ else:
218
+ new_melody_file = np.random.choice(midi_files)
219
+ selected_melody = new_melody_file
220
+ new_melody_file = render_midi(new_melody_file, fname=file.name)
221
+
222
+ crop_melody(new_melody_file, fname=file.name)
223
+ file_cleaner.add(file.name)
224
+ return file.name
225
+
226
+ def render_midi(midi_file, fname):
227
+ # sonify midi as sine wave
228
+ pm = pretty_midi.PrettyMIDI(midi_file)
229
+ sine_waves = pm.synthesize(fs=32000)
230
+ audio_write(fname, torch.from_numpy(sine_waves), 32000, strategy="loudness", loudness_headroom_db=16, loudness_compressor=True, add_suffix=False)
231
+ return fname
232
+
233
+ def crop_melody(melody_file, fname):
234
+ global selected_crop
235
+ crop = np.random.choice(len(crops))
236
+ crop = crops[crop]
237
+ selected_crop = crop
238
+ melody, sr = audio_read(melody_file)
239
+ melody = melody[:, crop[0]*sr:crop[1]*sr]
240
+ audio_write(fname, melody, sr, strategy="loudness", loudness_headroom_db=16, loudness_compressor=True, add_suffix=False)
241
 
242
  def run_remote_model(text, melody):
243
+ global selected_text
244
+ print("Running Audiocraft API model with text", text, "and melody", melody.split("/")[-1])
245
  result = client.predict(
246
  text, # str in 'Describe your music' Textbox component
247
  melody, # str (filepath or URL to file) in 'File' Audio component
 
260
  sp.run(["ffmpeg", "-i", result, "-vn", "-acodec", "pcm_s16le", "-ar", "32000", "-ac", "1", d_filename])
261
  # Load wav file
262
  output, sr = audio_read(d_filename)
263
+ # Crop to 10 seconds
264
+ output = output[:, :10*sr]
265
  # Demucs
266
  print("Running demucs")
267
  wav = convert_audio(output, sr, DEMUCS_MODEL.samplerate, DEMUCS_MODEL.audio_channels)
 
280
  d_filename, demucs_output, 32000, strategy="loudness",
281
  loudness_headroom_db=16, loudness_compressor=True, add_suffix=False)
282
  file_cleaner.add(d_filename)
283
+ selected_text = text
284
+
285
  print("Finished", text)
286
  print("Tempfiles currently stored: ", len(file_cleaner.files))
287
+ return d_filename#, gr.File.update(value=d_filename, visible=True)
288
+
289
+ def rating_callback(rating):
290
+ print("TEXT:", selected_text, "MELODY:" ,selected_melody, "CROP:", selected_crop, "RATING:", rating)
291
 
292
  def ui_full(launch_kwargs):
293
  with gr.Blocks() as interface:
 
306
  audio_type="numpy"
307
  else:
308
  audio_type="filepath"
309
+ melody = gr.Audio(type=audio_type, label="File", source="upload",
310
+ interactive=True, elem_id="melody-input", value=select_new_melody())
311
  new_melody = gr.Button("New Melody", interactive=True)
312
  with gr.Row():
313
  submit = gr.Button("Submit")
 
316
 
317
  with gr.Column():
318
  output_without_drum = gr.Audio(label="Output")
319
+ with gr.Row():
320
+ slider = gr.Slider(label="Rating", minimum=0, maximum=10, step=1, value=0, scale=2)
321
+ submit_button = gr.Button("Submit Rating", scale=1)
322
+
323
+ # file_download_no_drum = gr.File(label="Download", visible=False)
324
+ # gr.Markdown(
325
+ # """
326
+ # Note that the files will be deleted after 10 minutes, so make sure to download!
327
+ # """
328
+ # )
329
  if LOCAL:
330
  submit.click(predict_full,
331
  inputs=[text, melody],
332
+ outputs=[output_without_drum])#, file_download_no_drum])
333
  else:
334
+ submit.click(run_remote_model, inputs=[text, melody], outputs=[output_without_drum])#, file_download_no_drum])
335
  new_melody.click(select_new_melody, outputs=[melody])
336
+
337
+ # Button callbacks
338
+ submit_button.click(rating_callback, inputs=[slider])
339
+
340
  gr.Examples(
341
  fn=predict_full,
342
  examples=[
343
  ["Enchanting Flute Trills amidst Misty String Section"],
344
  ["Gliding Mellotron Strings over Vibrant Phrases"],
345
  ["Synth Brass Melody Floating over Airy Wind Chimes"],
 
346
  ["Rhythmic Acoustic Guitar Licks with Echoing Layers"],
347
  ["Whimsical Flute Flourishes in a Mystical Forest Glade"],
348
  ["Airy Piccolo Trills accompanied by Floating Harp Arpeggios"],
 
351
  ["Enchanting Kalimba Melodies atop Mystical Atmosphere"],
352
  ],
353
  inputs=[text],
354
+ outputs=[output_without_drum]#, file_download_no_drum]
355
  )
356
 
357
  interface.queue().launch(**launch_kwargs)
 
366
  help='IP to listen on for connections to Gradio',
367
  )
368
  parser.add_argument("--local", action="store_true", help="Run locally instead of using API")
369
+ parser.add_argument("--midi", action="store_true", help="Render midi instead of wav")
370
 
371
  args = parser.parse_args()
372
 
 
374
  launch_kwargs['server_name'] = args.listen
375
 
376
  LOCAL = args.local
377
+ USE_MIDI = args.midi
378
+
379
+ print("Using midi:", USE_MIDI)
380
  # Load melody model
381
  load_model()
382
  if not LOCAL:
setup.py CHANGED
@@ -35,7 +35,9 @@ setup(
35
  "flask",
36
  "flask-socketio",
37
  "audiocraft@git+https://github.com/facebookresearch/audiocraft",
38
- "gradio"
 
 
39
  ],
40
  include_package_data=True,
41
  )
 
35
  "flask",
36
  "flask-socketio",
37
  "audiocraft@git+https://github.com/facebookresearch/audiocraft",
38
+ "gradio",
39
+ "gradio_client",
40
+ "pretty_midi"
41
  ],
42
  include_package_data=True,
43
  )