Hugo Flores Garcia commited on
Commit
4c6c719
1 Parent(s): fff28a2
Files changed (3) hide show
  1. README.md +0 -5
  2. demo.py → app.py +25 -57
  3. setup.py +1 -0
README.md CHANGED
@@ -41,11 +41,6 @@ Download the pretrained models from [this link](https://zenodo.org/record/813654
41
 
42
  # Usage
43
 
44
- First, you'll want to set up your environment
45
- ```bash
46
- source ./env/env.sh
47
- ```
48
-
49
  ## Launching the Gradio Interface
50
  You can launch a gradio UI to play with vampnet.
51
 
 
41
 
42
  # Usage
43
 
 
 
 
 
 
44
  ## Launching the Gradio Interface
45
  You can launch a gradio UI to play with vampnet.
46
 
demo.py → app.py RENAMED
@@ -32,15 +32,6 @@ dataset = at.data.datasets.AudioDataset(
32
  )
33
 
34
 
35
- checkpoints = {
36
- "vampnet": {
37
- "coarse": "./models/vampnet/coarse.pth",
38
- "c2f": "./models/vampnet/c2f.pth",
39
- "codec": "./models/vampnet/codec.pth",
40
- "full_ckpt": True
41
- },
42
- }
43
- interface.checkpoint_key = "vampnet"
44
 
45
 
46
  OUT_DIR = Path("gradio-outputs")
@@ -74,23 +65,10 @@ def load_random_audio():
74
 
75
 
76
  def _vamp(data, return_mask=False):
77
-
78
- # if our checkpoint key is different, we need to load a new checkpoint
79
- if data[checkpoint_key] != interface.checkpoint_key:
80
- print(f"loading checkpoint {data[checkpoint_key]}")
81
- interface.lora_load(
82
- checkpoints[data[checkpoint_key]]["coarse"],
83
- checkpoints[data[checkpoint_key]]["c2f"],
84
- checkpoints[data[checkpoint_key]]["full_ckpt"],
85
- )
86
- interface.checkpoint_key = data[checkpoint_key]
87
-
88
  out_dir = OUT_DIR / str(uuid.uuid4())
89
  out_dir.mkdir()
90
  sig = at.AudioSignal(data[input_audio])
91
 
92
- # TODO: random pitch shift of segments in the signal to prompt! window size should be a parameter, pitch shift width should be a parameter
93
-
94
  z = interface.encode(sig)
95
 
96
  ncc = data[n_conditioning_codebooks]
@@ -211,10 +189,7 @@ with gr.Blocks() as demo:
211
 
212
  with gr.Row():
213
  with gr.Column():
214
- use_coarse2fine = gr.Checkbox(
215
- label="use coarse2fine",
216
- value=True
217
- )
218
 
219
  manual_audio_upload = gr.File(
220
  label=f"upload some audio (will be randomly trimmed to max of {interface.coarse.chunk_size_s:.2f}s)",
@@ -250,38 +225,17 @@ with gr.Blocks() as demo:
250
  # mask settings
251
  with gr.Column():
252
 
253
- input_pitch_shift = gr.Slider(
254
- label="input pitch shift (semitones)",
255
- minimum=-36,
256
- maximum=36,
257
- step=1,
258
- value=0,
259
- )
260
-
261
- rand_mask_intensity = gr.Slider(
262
- label="random mask intensity. (If this is less than 1, scatters prompts throughout the audio, should be between 0.9 and 1.0)",
263
- minimum=0.0,
264
- maximum=1.0,
265
- value=1.0
266
- )
267
-
268
  periodic_p = gr.Slider(
269
- label="periodic prompt (0.0 means no hint, 2 - lots of hints, 8 - a couple of hints, 16 - occasional hint, 32 - very occasional hint, etc)",
270
  minimum=0,
271
  maximum=128,
272
  step=1,
273
  value=3,
274
  )
275
- periodic_w = gr.Slider(
276
- label="periodic prompt width (steps, 1 step ~= 10milliseconds)",
277
- minimum=1,
278
- maximum=20,
279
- step=1,
280
- value=1,
281
- )
282
 
283
  onset_mask_width = gr.Slider(
284
- label="onset mask width (steps, 1 step ~= 10milliseconds)",
285
  minimum=0,
286
  maximum=20,
287
  step=1,
@@ -301,6 +255,20 @@ with gr.Blocks() as demo:
301
 
302
 
303
  with gr.Accordion("extras ", open=False):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
304
  n_conditioning_codebooks = gr.Number(
305
  label="number of conditioning codebooks. probably 0",
306
  value=0,
@@ -337,6 +305,8 @@ with gr.Blocks() as demo:
337
  value=0.8
338
  )
339
 
 
 
340
  with gr.Accordion("sampling settings", open=False):
341
  typical_filtering = gr.Checkbox(
342
  label="typical filtering ",
@@ -356,6 +326,11 @@ with gr.Blocks() as demo:
356
  value=64
357
  )
358
 
 
 
 
 
 
359
  num_steps = gr.Slider(
360
  label="number of steps (should normally be between 12 and 36)",
361
  minimum=1,
@@ -375,11 +350,6 @@ with gr.Blocks() as demo:
375
 
376
  # mask settings
377
  with gr.Column():
378
- checkpoint_key = gr.Radio(
379
- label="checkpoint",
380
- choices=list(checkpoints.keys()),
381
- value="spotdl"
382
- )
383
  vamp_button = gr.Button("vamp!!!")
384
  output_audio = gr.Audio(
385
  label="output audio",
@@ -414,11 +384,9 @@ with gr.Blocks() as demo:
414
  use_coarse2fine,
415
  stretch_factor,
416
  onset_mask_width,
417
- input_pitch_shift,
418
  typical_filtering,
419
  typical_mass,
420
  typical_min_tokens,
421
- checkpoint_key,
422
  beat_mask_width,
423
  beat_mask_downbeats
424
  }
 
32
  )
33
 
34
 
 
 
 
 
 
 
 
 
 
35
 
36
 
37
  OUT_DIR = Path("gradio-outputs")
 
65
 
66
 
67
  def _vamp(data, return_mask=False):
 
 
 
 
 
 
 
 
 
 
 
68
  out_dir = OUT_DIR / str(uuid.uuid4())
69
  out_dir.mkdir()
70
  sig = at.AudioSignal(data[input_audio])
71
 
 
 
72
  z = interface.encode(sig)
73
 
74
  ncc = data[n_conditioning_codebooks]
 
189
 
190
  with gr.Row():
191
  with gr.Column():
192
+
 
 
 
193
 
194
  manual_audio_upload = gr.File(
195
  label=f"upload some audio (will be randomly trimmed to max of {interface.coarse.chunk_size_s:.2f}s)",
 
225
  # mask settings
226
  with gr.Column():
227
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
  periodic_p = gr.Slider(
229
+ label="periodic prompt (0.0 means no prompt, 2 - lots of hints, 8 - a couple of hints, 16 - occasional hint, 32 - very occasional hint, etc)",
230
  minimum=0,
231
  maximum=128,
232
  step=1,
233
  value=3,
234
  )
235
+
 
 
 
 
 
 
236
 
237
  onset_mask_width = gr.Slider(
238
+ label="onset mask width (multiplies with the periodic mask, 1 step ~= 10milliseconds) ",
239
  minimum=0,
240
  maximum=20,
241
  step=1,
 
255
 
256
 
257
  with gr.Accordion("extras ", open=False):
258
+ rand_mask_intensity = gr.Slider(
259
+ label="random mask intensity. (If this is less than 1, scatters prompts throughout the audio, should be between 0.9 and 1.0)",
260
+ minimum=0.0,
261
+ maximum=1.0,
262
+ value=1.0
263
+ )
264
+
265
+ periodic_w = gr.Slider(
266
+ label="periodic prompt width (steps, 1 step ~= 10milliseconds)",
267
+ minimum=1,
268
+ maximum=20,
269
+ step=1,
270
+ value=1,
271
+ )
272
  n_conditioning_codebooks = gr.Number(
273
  label="number of conditioning codebooks. probably 0",
274
  value=0,
 
305
  value=0.8
306
  )
307
 
308
+
309
+
310
  with gr.Accordion("sampling settings", open=False):
311
  typical_filtering = gr.Checkbox(
312
  label="typical filtering ",
 
326
  value=64
327
  )
328
 
329
+ use_coarse2fine = gr.Checkbox(
330
+ label="use coarse2fine",
331
+ value=True
332
+ )
333
+
334
  num_steps = gr.Slider(
335
  label="number of steps (should normally be between 12 and 36)",
336
  minimum=1,
 
350
 
351
  # mask settings
352
  with gr.Column():
 
 
 
 
 
353
  vamp_button = gr.Button("vamp!!!")
354
  output_audio = gr.Audio(
355
  label="output audio",
 
384
  use_coarse2fine,
385
  stretch_factor,
386
  onset_mask_width,
 
387
  typical_filtering,
388
  typical_mass,
389
  typical_min_tokens,
 
390
  beat_mask_width,
391
  beat_mask_downbeats
392
  }
setup.py CHANGED
@@ -28,6 +28,7 @@ setup(
28
  install_requires=[
29
  "torch",
30
  "argbind>=0.3.2",
 
31
  # "audiotools @ git+https://github.com/descriptinc/audiotools.git@f35914b5b3c6f1bf589cd09481478d741538828e",
32
  # "dac @ git+https://github.com/descriptinc/descript-audio-codec.git",
33
  "gradio",
 
28
  install_requires=[
29
  "torch",
30
  "argbind>=0.3.2",
31
+ "numpy==1.22",
32
  # "audiotools @ git+https://github.com/descriptinc/audiotools.git@f35914b5b3c6f1bf589cd09481478d741538828e",
33
  # "dac @ git+https://github.com/descriptinc/descript-audio-codec.git",
34
  "gradio",