hugo flores garcia commited on
Commit
3cd737d
1 Parent(s): b90b198
Files changed (1) hide show
  1. app.py +39 -51
app.py CHANGED
@@ -73,14 +73,8 @@ def load_example_audio():
73
  return "./assets/example.wav"
74
 
75
 
76
- def _vamp(data, return_mask=False):
77
- # remove any old files in the output directory (from previous runs)
78
- shutil.rmtree(OUT_DIR)
79
- OUT_DIR.mkdir()
80
 
81
- out_dir = OUT_DIR / str(uuid.uuid4())
82
- out_dir.mkdir()
83
- sig = at.AudioSignal(data[input_audio])
84
  sig = interface.preprocess(sig)
85
 
86
  loudness = sig.loudness()
@@ -103,16 +97,10 @@ def _vamp(data, return_mask=False):
103
  mask = pmask.codebook_mask(mask, int(data[n_mask_codebooks]))
104
 
105
  print(f"sampletemp {data[sampletemp]}")
106
- print(f"top_p {data[top_p]}")
107
  print(f"num_steps {data[num_steps]}")
108
  print(f"periodic_p {data[periodic_p]}")
109
  print(f"seed {data[seed]}")
110
 
111
-
112
- _top_p = data[top_p] if data[top_p] > 0 else None
113
- # save the mask as a txt file
114
- np.savetxt(out_dir / "mask.txt", mask[:,0,:].long().cpu().numpy())
115
-
116
  _seed = data[seed] if data[seed] > 0 else None
117
  print(f"processing coarse...")
118
  zv, mask_z = interface.coarse_vamp(
@@ -122,41 +110,44 @@ def _vamp(data, return_mask=False):
122
  mask_temperature=1.5*10,
123
  sampling_temperature=data[sampletemp],
124
  return_mask=True,
125
- top_p=_top_p,
126
  gen_fn=interface.coarse.generate,
127
  seed=_seed,
128
  sample_cutoff=1.0,
129
  )
130
 
131
- if use_coarse2fine:
132
- print(f"processing coarse to fine...")
133
- zv = interface.coarse_to_fine(
134
- zv,
135
- mask_temperature=1.5*10,
136
- sampling_temperature=data[sampletemp],
137
- mask=mask,
138
- sampling_steps=data[num_steps] // 2,
139
- sample_cutoff=1.0,
140
- seed=_seed,
141
- )
142
 
143
  sig = interface.to_signal(zv).cpu()
144
  print("done")
 
145
 
146
- sig.write(out_dir / "output.wav")
147
 
148
- if return_mask:
149
- mask = interface.to_signal(mask_z).cpu()
150
- mask.write(out_dir / "mask.wav")
151
- return sig.path_to_file, mask.path_to_file
152
- else:
153
- return sig.path_to_file
 
 
 
 
 
154
 
155
- def vamp(data):
156
- return _vamp(data, return_mask=True)
157
 
158
- def api_vamp(data):
159
- return _vamp(data, return_mask=False)
 
160
 
161
 
162
  with gr.Blocks() as demo:
@@ -228,16 +219,16 @@ with gr.Blocks() as demo:
228
  label="number of steps (should normally be between 12 and 36)",
229
  minimum=1,
230
  maximum=128,
231
- step=1,
232
- value=36
233
  )
234
 
235
- dropout = gr.Slider(
236
- label="mask dropout",
237
- minimum=0.0,
238
- maximum=1.0,
239
- step=0.01,
240
- value=0.0
241
  )
242
 
243
 
@@ -247,10 +238,8 @@ with gr.Blocks() as demo:
247
  precision=0,
248
  )
249
 
250
- # mask settings
251
- with gr.Column():
252
 
253
- vamp_button = gr.Button("generate (vamp)!!!")
254
  output_audio = gr.Audio(
255
  label="output audio",
256
  interactive=False,
@@ -262,23 +251,22 @@ with gr.Blocks() as demo:
262
  num_steps,
263
  sampletemp,
264
  periodic_p,
265
- dropout,
266
  seed,
267
  n_mask_codebooks,
268
  }
269
 
270
  # connect widgets
271
  vamp_button.click(
272
- fn=vamp,
273
  inputs=_inputs,
274
  outputs=[output_audio],
275
  )
276
 
277
 
278
  build_endpoint(
279
- inputs=harp_inputs,
280
  output=output_audio,
281
- process_fn=harp_vamp,
282
  card=ModelCard(
283
  name="vampnet",
284
  description="turn your music into NES music!! quick!! NOTE: vampnet's has a maximum context length of 10 seconds. Please split all audio clips into 10 second chunks, or processing will result in an error. ",
 
73
  return "./assets/example.wav"
74
 
75
 
76
+ def _vamp(sig, data):
 
 
 
77
 
 
 
 
78
  sig = interface.preprocess(sig)
79
 
80
  loudness = sig.loudness()
 
97
  mask = pmask.codebook_mask(mask, int(data[n_mask_codebooks]))
98
 
99
  print(f"sampletemp {data[sampletemp]}")
 
100
  print(f"num_steps {data[num_steps]}")
101
  print(f"periodic_p {data[periodic_p]}")
102
  print(f"seed {data[seed]}")
103
 
 
 
 
 
 
104
  _seed = data[seed] if data[seed] > 0 else None
105
  print(f"processing coarse...")
106
  zv, mask_z = interface.coarse_vamp(
 
110
  mask_temperature=1.5*10,
111
  sampling_temperature=data[sampletemp],
112
  return_mask=True,
113
+ top_p=0.85,
114
  gen_fn=interface.coarse.generate,
115
  seed=_seed,
116
  sample_cutoff=1.0,
117
  )
118
 
119
+ print(f"processing coarse to fine...")
120
+ zv = interface.coarse_to_fine(
121
+ zv,
122
+ mask_temperature=1.5*10,
123
+ sampling_temperature=data[sampletemp],
124
+ mask=mask,
125
+ sampling_steps=data[num_steps] // 2,
126
+ sample_cutoff=1.0,
127
+ seed=_seed,
128
+ )
 
129
 
130
  sig = interface.to_signal(zv).cpu()
131
  print("done")
132
+ return sig
133
 
 
134
 
135
+ def process_fn(data):
136
+ # remove any old files in the output directory (from previous runs)
137
+ shutil.rmtree(OUT_DIR)
138
+ OUT_DIR.mkdir()
139
+
140
+ out_dir = OUT_DIR / str(uuid.uuid4())
141
+ out_dir.mkdir()
142
+ sig = at.AudioSignal(data[input_audio])
143
+
144
+ for _pass in range(data[num_passes]):
145
+ pass
146
 
 
 
147
 
148
+ sig.write(out_dir / "output.wav")
149
+
150
+ return sig.path_to_file
151
 
152
 
153
  with gr.Blocks() as demo:
 
219
  label="number of steps (should normally be between 12 and 36)",
220
  minimum=1,
221
  maximum=128,
222
+ step=6,
223
+ value=24
224
  )
225
 
226
+ num_passes = gr.Slider(
227
+ label="number of passes (more passes = more time, but better results)",
228
+ minimum=2,
229
+ maximum=8,
230
+ step=1,
231
+ value=4
232
  )
233
 
234
 
 
238
  precision=0,
239
  )
240
 
 
 
241
 
242
+ vamp_button = gr.Button("nes, quick!!!!!")
243
  output_audio = gr.Audio(
244
  label="output audio",
245
  interactive=False,
 
251
  num_steps,
252
  sampletemp,
253
  periodic_p,
 
254
  seed,
255
  n_mask_codebooks,
256
  }
257
 
258
  # connect widgets
259
  vamp_button.click(
260
+ fn=process_fn,
261
  inputs=_inputs,
262
  outputs=[output_audio],
263
  )
264
 
265
 
266
  build_endpoint(
267
+ inputs=_inputs,
268
  output=output_audio,
269
+ process_fn=process_fn,
270
  card=ModelCard(
271
  name="vampnet",
272
  description="turn your music into NES music!! quick!! NOTE: vampnet's has a maximum context length of 10 seconds. Please split all audio clips into 10 second chunks, or processing will result in an error. ",