Hugo Flores Garcia commited on
Commit
128981d
1 Parent(s): 03f09ee
Files changed (2) hide show
  1. demo.py +31 -17
  2. vampnet/interface.py +15 -0
demo.py CHANGED
@@ -65,13 +65,21 @@ def vamp(
65
  mask_periodic_amt, beat_unmask_dur,
66
  mask_dwn_chk, dwn_factor,
67
  mask_up_chk, up_factor,
68
- num_vamps, mode, use_beats, num_steps
69
  ):
70
  # try:
71
  print(input_audio)
72
 
73
- sig = at.AudioSignal(input_audio.name)
74
-
 
 
 
 
 
 
 
 
75
  if beat_unmask_dur > 0.0 and use_beats:
76
  beat_mask = interface.make_beat_mask(
77
  sig,
@@ -142,13 +150,13 @@ def save_vamp(
142
  mask_periodic_amt, beat_unmask_dur,
143
  mask_dwn_chk, dwn_factor,
144
  mask_up_chk, up_factor,
145
- num_vamps, mode, output_audio, notes, use_beats, num_steps
146
  ):
147
  out_dir = OUT_DIR / "saved" / str(uuid.uuid4())
148
  out_dir.mkdir(parents=True, exist_ok=True)
149
 
150
- sig_in = at.AudioSignal(input_audio.name)
151
- sig_out = at.AudioSignal(output_audio.name)
152
 
153
  sig_in.write(out_dir / "input.wav")
154
  sig_out.write(out_dir / "output.wav")
@@ -168,6 +176,7 @@ def save_vamp(
168
  "up_factor": up_factor,
169
  "num_vamps": num_vamps,
170
  "num_steps": num_steps,
 
171
  "mode": mode,
172
  "notes": notes,
173
  }
@@ -212,12 +221,12 @@ with gr.Blocks() as demo:
212
  with gr.Column():
213
  gr.Markdown("""
214
  ### Tips
215
- - use the beat sync button so the output audio has the same beat structure as the input audio
216
  - if you want the generated audio to sound like the original, but with a different beat structure:
217
- - uncheck the beat sync button
218
  - decrease the periodic unmasking to anywhere from 2 to 8
219
  - if you want a more "random" generation:
220
- - uncheck the beat sync button (or reduce the beat unmask duration)
221
  - increase the periodic unmasking to 16 or more
222
  - increase the temperatures!
223
 
@@ -228,11 +237,11 @@ with gr.Blocks() as demo:
228
  with gr.Column():
229
  mode = gr.Radio(
230
  label="**mode**. note that loop mode requires a prefix and suffix longer than 0",
231
- choices=["standard", "loop"],
232
  value="standard"
233
  )
234
  num_vamps = gr.Number(
235
- label="number of vamps (or loops). more vamps = longer generated audio",
236
  value=1,
237
  precision=0
238
  )
@@ -246,13 +255,13 @@ with gr.Blocks() as demo:
246
  input_audio = gr.Audio(
247
  label="input audio",
248
  interactive=False,
249
- type="file",
250
  )
251
 
252
  audio_mask = gr.Audio(
253
  label="audio mask (listen to this to hear the mask hints)",
254
  interactive=False,
255
- type="file",
256
  )
257
 
258
  # connect widgets
@@ -273,7 +282,7 @@ with gr.Blocks() as demo:
273
  with gr.Column():
274
 
275
  mask_periodic_amt = gr.Slider(
276
- label="periodic hint (0.0 means no hint, 2 means one hint every 2 timesteps, etc, 4 means one hint every 4 timesteps, etc)",
277
  minimum=0,
278
  maximum=64,
279
  step=1,
@@ -321,6 +330,11 @@ with gr.Blocks() as demo:
321
  value=True
322
  )
323
 
 
 
 
 
 
324
  num_steps = gr.Slider(
325
  label="number of steps (should normally be between 12 and 36)",
326
  minimum=4,
@@ -334,7 +348,7 @@ with gr.Blocks() as demo:
334
  output_audio = gr.Audio(
335
  label="output audio",
336
  interactive=False,
337
- type="file"
338
  )
339
 
340
 
@@ -407,7 +421,7 @@ with gr.Blocks() as demo:
407
  mask_periodic_amt, beat_unmask_dur,
408
  mask_dwn_chk, dwn_factor,
409
  mask_up_chk, up_factor,
410
- num_vamps, mode, use_beats, num_steps
411
  ],
412
  outputs=[output_audio, audio_mask]
413
  )
@@ -422,7 +436,7 @@ with gr.Blocks() as demo:
422
  mask_up_chk, up_factor,
423
  num_vamps, mode,
424
  output_audio,
425
- notes_text, use_beats, num_steps
426
  ],
427
  outputs=[thank_you, download_file]
428
  )
 
65
  mask_periodic_amt, beat_unmask_dur,
66
  mask_dwn_chk, dwn_factor,
67
  mask_up_chk, up_factor,
68
+ num_vamps, mode, use_beats, num_steps, snap_to_beats
69
  ):
70
  # try:
71
  print(input_audio)
72
 
73
+ sig = at.AudioSignal(input_audio)
74
+
75
+ if snap_to_beats:
76
+ old_sig = sig.clone()
77
+ sig = interface.snap_to_beats(sig)
78
+ if sig.duration < (sig.duration / 4): # we cut off too much
79
+ sig = old_sig
80
+ print(f"new sig duration is {sig.duration} which is too short, reverting to old sig")
81
+ print(f"new sig duration is {sig.duration}")
82
+
83
  if beat_unmask_dur > 0.0 and use_beats:
84
  beat_mask = interface.make_beat_mask(
85
  sig,
 
150
  mask_periodic_amt, beat_unmask_dur,
151
  mask_dwn_chk, dwn_factor,
152
  mask_up_chk, up_factor,
153
+ num_vamps, mode, output_audio, notes, use_beats, num_steps, snap_to_beats
154
  ):
155
  out_dir = OUT_DIR / "saved" / str(uuid.uuid4())
156
  out_dir.mkdir(parents=True, exist_ok=True)
157
 
158
+ sig_in = at.AudioSignal(input_audio)
159
+ sig_out = at.AudioSignal(output_audio)
160
 
161
  sig_in.write(out_dir / "input.wav")
162
  sig_out.write(out_dir / "output.wav")
 
176
  "up_factor": up_factor,
177
  "num_vamps": num_vamps,
178
  "num_steps": num_steps,
179
+ "snap_to_beats": snap_to_beats,
180
  "mode": mode,
181
  "notes": notes,
182
  }
 
221
  with gr.Column():
222
  gr.Markdown("""
223
  ### Tips
224
+ - use the beat hint button so the output audio has the same beat structure as the input audio
225
  - if you want the generated audio to sound like the original, but with a different beat structure:
226
+ - uncheck the beat hint button
227
  - decrease the periodic unmasking to anywhere from 2 to 8
228
  - if you want a more "random" generation:
229
+ - uncheck the beat hint button (or reduce the beat unmask duration)
230
  - increase the periodic unmasking to 16 or more
231
  - increase the temperatures!
232
 
 
237
  with gr.Column():
238
  mode = gr.Radio(
239
  label="**mode**. note that loop mode requires a prefix and suffix longer than 0",
240
+ choices=["standard",],
241
  value="standard"
242
  )
243
  num_vamps = gr.Number(
244
+ label="number of vamps. more vamps = longer generated audio",
245
  value=1,
246
  precision=0
247
  )
 
255
  input_audio = gr.Audio(
256
  label="input audio",
257
  interactive=False,
258
+ type="filepath",
259
  )
260
 
261
  audio_mask = gr.Audio(
262
  label="audio mask (listen to this to hear the mask hints)",
263
  interactive=False,
264
+ type="filepath",
265
  )
266
 
267
  # connect widgets
 
282
  with gr.Column():
283
 
284
  mask_periodic_amt = gr.Slider(
285
+ label="periodic hint (0.0 means no hint, 2 - lots of hints, 8 - a couple of hints, 16 - occasional hint, 32 - very occasional hint, etc)",
286
  minimum=0,
287
  maximum=64,
288
  step=1,
 
330
  value=True
331
  )
332
 
333
+ snap_to_beats = gr.Checkbox(
334
+ label="trim to beat markers (uncheck if the output audio is too short.)",
335
+ value=True
336
+ )
337
+
338
  num_steps = gr.Slider(
339
  label="number of steps (should normally be between 12 and 36)",
340
  minimum=4,
 
348
  output_audio = gr.Audio(
349
  label="output audio",
350
  interactive=False,
351
+ type="filepath"
352
  )
353
 
354
 
 
421
  mask_periodic_amt, beat_unmask_dur,
422
  mask_dwn_chk, dwn_factor,
423
  mask_up_chk, up_factor,
424
+ num_vamps, mode, use_beats, num_steps, snap_to_beats
425
  ],
426
  outputs=[output_audio, audio_mask]
427
  )
 
436
  mask_up_chk, up_factor,
437
  num_vamps, mode,
438
  output_audio,
439
+ notes_text, use_beats, num_steps, snap_to_beats
440
  ],
441
  outputs=[thank_you, download_file]
442
  )
vampnet/interface.py CHANGED
@@ -111,6 +111,21 @@ class Interface:
111
  z = self.codec.encode(signal.samples, signal.sample_rate)["codes"]
112
  return z
113
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  def make_beat_mask(self,
115
  signal: AudioSignal,
116
  before_beat_s: float = 0.1,
 
111
  z = self.codec.encode(signal.samples, signal.sample_rate)["codes"]
112
  return z
113
 
114
+ def snap_to_beats(
115
+ self,
116
+ signal: AudioSignal
117
+ ):
118
+ assert hasattr(self, "beat_tracker"), "No beat tracker loaded"
119
+ beats, downbeats = self.beat_tracker.extract_beats(signal)
120
+
121
+ # trim the signa around the first beat time
122
+ samples_begin = int(beats[0] * signal.sample_rate )
123
+ samples_end = int(beats[-1] * signal.sample_rate)
124
+ print(beats[0])
125
+ signal = signal.clone().trim(samples_begin, signal.length - samples_end)
126
+
127
+ return signal
128
+
129
  def make_beat_mask(self,
130
  signal: AudioSignal,
131
  before_beat_s: float = 0.1,