Hugo Flores Garcia commited on
Commit
a689560
1 Parent(s): 49a8e09
Files changed (1) hide show
  1. app.py +472 -129
app.py CHANGED
@@ -1,31 +1,20 @@
1
- # huggingface space exclusive
2
- import os
3
-
4
- # print("installing pyharp")
5
- # os.system('pip install "pyharp@git+https://github.com/audacitorch/pyharp.git"')
6
- # print("installing madmom")
7
- # os.system('pip install cython')
8
- # os.system('pip install madmom')
9
-
10
  from pathlib import Path
11
- from typing import Tuple
12
  import yaml
13
- import tempfile
14
  import uuid
15
- import shutil
16
- from dataclasses import dataclass, asdict
17
 
18
  import numpy as np
19
  import audiotools as at
20
  import argbind
 
21
  import torch
 
22
 
23
  import gradio as gr
24
- from vampnet.interface import Interface
25
  from vampnet import mask as pmask
26
 
27
- from pyharp import ModelCard, build_endpoint
28
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
29
 
30
  interface = Interface(
31
  device=device,
@@ -46,6 +35,16 @@ generated_confs = Path("conf/generated")
46
  for conf_file in generated_confs.glob("*/interface.yml"):
47
  with open(conf_file) as f:
48
  _conf = yaml.safe_load(f)
 
 
 
 
 
 
 
 
 
 
49
  MODEL_CHOICES[conf_file.parent.name] = _conf
50
 
51
 
@@ -53,15 +52,15 @@ for conf_file in generated_confs.glob("*/interface.yml"):
53
  OUT_DIR = Path("gradio-outputs")
54
  OUT_DIR.mkdir(exist_ok=True, parents=True)
55
 
56
-
57
  def load_audio(file):
58
  print(file)
59
  filepath = file.name
60
  sig = at.AudioSignal.salient_excerpt(
61
- filepath,
62
- duration=interface.coarse.chunk_size_s
63
  )
64
- sig = interface.preprocess(sig)
 
65
 
66
  out_dir = OUT_DIR / "tmp" / str(uuid.uuid4())
67
  out_dir.mkdir(parents=True, exist_ok=True)
@@ -72,92 +71,234 @@ def load_audio(file):
72
  def load_example_audio():
73
  return "./assets/example.wav"
74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
- def _vamp(sig, data):
77
-
78
- sig = interface.preprocess(sig)
 
 
79
 
80
  loudness = sig.loudness()
81
  print(f"input loudness is {loudness}")
82
 
83
- z = interface.encode(sig)
84
-
85
- # build the mask
86
- mask = pmask.full_mask(z)
87
- mask = pmask.mask_and(
88
- mask, pmask.periodic_mask(
89
- z,
90
- data[periodic_p],
91
- random_roll=True
92
- )
 
 
 
 
 
 
 
93
  )
94
 
95
- # these should be the last two mask ops
96
- mask = pmask.codebook_mask(mask, int(data[n_mask_codebooks]))
97
-
98
- print(f"sampletemp {data[sampletemp]}")
99
- print(f"num_steps {data[num_steps]}")
100
- print(f"periodic_p {data[periodic_p]}")
101
-
102
- print(f"processing coarse...")
103
- zv, mask_z = interface.coarse_vamp(
104
- z,
105
- mask=mask,
106
- sampling_steps=data[num_steps],
107
- mask_temperature=1.5*10,
108
- sampling_temperature=data[sampletemp],
109
- return_mask=True,
110
- top_p=0.85,
111
- gen_fn=interface.coarse.generate,
112
- sample_cutoff=1.0,
113
  )
114
 
115
- print(f"processing coarse to fine...")
116
- zv = interface.coarse_to_fine(
117
- zv,
118
- mask_temperature=1.5*10,
119
- sampling_temperature=data[sampletemp],
120
- mask=mask,
121
- sampling_steps=data[num_steps] // 2,
122
- sample_cutoff=1.0,
 
 
123
  )
124
 
125
- sig = interface.to_signal(zv).cpu()
126
- print("done")
127
- return sig
128
-
129
-
130
- def process_fn(data):
131
- # remove any old files in the output directory (from previous runs)
132
- shutil.rmtree(OUT_DIR)
133
- OUT_DIR.mkdir()
134
-
135
- out_dir = OUT_DIR / str(uuid.uuid4())
136
- out_dir.mkdir()
137
- sig = at.AudioSignal(data[input_audio])
138
-
139
- for _pass in range(data[num_passes]):
140
- sig = _vamp(sig, data)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
 
143
- sig.write(out_dir / "output.wav")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
 
145
- return sig.path_to_file
146
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
 
148
  with gr.Blocks() as demo:
149
-
150
  with gr.Row():
151
  with gr.Column():
152
- gr.Markdown("# nesquik 🌰🐿️👾 ")
153
- gr.Markdown(" the ultimate bitcrusher! will do its best to convert your instrumental music into an 8-bit chiptune.")
154
-
155
- with gr.Row():
156
- with gr.Column():
157
-
158
-
159
  manual_audio_upload = gr.File(
160
- label=f"upload some audio (will be randomly trimmed to max of {interface.coarse.chunk_size_s:.2f}s)",
161
  file_types=["audio"]
162
  )
163
  load_example_audio_button = gr.Button("or load example audio")
@@ -168,6 +309,11 @@ with gr.Blocks() as demo:
168
  type="filepath",
169
  )
170
 
 
 
 
 
 
171
 
172
  # connect widgets
173
  load_example_audio_button.click(
@@ -182,85 +328,282 @@ with gr.Blocks() as demo:
182
  outputs=[ input_audio]
183
  )
184
 
 
 
185
  # mask settings
186
  with gr.Column():
187
- with gr.Accordion("controls", open=False):
188
  periodic_p = gr.Slider(
189
  label="periodic prompt",
190
- minimum=1,
191
- maximum=3,
 
 
 
 
 
 
 
192
  step=1,
193
- value=2,
 
 
 
 
 
 
 
 
194
  )
195
 
196
  n_mask_codebooks = gr.Slider(
197
- label="first upper codebook level to mask",
 
198
  minimum=0,
199
- maximum=9,
200
- value=2,
201
  step=1,
202
  )
 
 
 
 
203
 
204
- sampletemp = gr.Slider(
205
- label="sample temperature",
206
- minimum=0.8,
207
- maximum=1.5,
208
- value=1.0,
209
- step=0.001
 
210
  )
211
-
212
- num_steps = gr.Slider(
213
- label="number of steps (should normally be between 12 and 36)",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
214
  minimum=1,
215
- maximum=36,
216
- step=6,
217
- value=24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
  )
219
 
220
- num_passes = gr.Slider(
221
- label="number of passes (more passes = more time, but better results)",
222
- minimum=2,
223
- maximum=6,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
224
  step=1,
225
- value=4
226
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
 
228
 
229
- vamp_button = gr.Button("nes, quick!!!!!")
230
- output_audio = gr.Audio(
231
- label="output audio",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
232
  interactive=False,
233
  type="filepath"
234
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
235
 
236
  _inputs = {
237
  input_audio,
238
- num_steps,
239
  sampletemp,
240
- periodic_p,
 
 
 
 
 
 
 
 
 
 
 
241
  n_mask_codebooks,
242
- num_passes
 
 
 
 
 
243
  }
244
 
245
  # connect widgets
246
  vamp_button.click(
247
- fn=process_fn,
248
  inputs=_inputs,
249
- outputs=[output_audio],
 
 
 
 
 
 
 
 
250
  )
251
 
 
 
 
 
 
 
 
 
252
 
253
  build_endpoint(
254
- inputs=list(_inputs),
255
- output=output_audio,
256
- process_fn=process_fn,
257
- card=ModelCard(
258
- name="nesquik 🌰🐿️👾",
259
- description="turn your music into NES music!! quick!! NOTE: vampnet's has a maximum context length of 10 seconds. Please split all audio clips into 10 second chunks, or processing will result in an error. ",
260
- author="Hugo Flores García",
261
- tags=["music", "generative"]
262
- ),
263
- visible=False
 
264
  )
265
 
266
- demo.queue().launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from pathlib import Path
 
2
  import yaml
 
3
  import uuid
 
 
4
 
5
  import numpy as np
6
  import audiotools as at
7
  import argbind
8
+ import shutil
9
  import torch
10
+ from datetime import datetime
11
 
12
  import gradio as gr
13
+ from vampnet.interface import Interface, signal_concat
14
  from vampnet import mask as pmask
15
 
16
+ device = "cuda" if torch.cuda.is_available() else "cpu"
17
+
18
 
19
  interface = Interface(
20
  device=device,
 
35
  for conf_file in generated_confs.glob("*/interface.yml"):
36
  with open(conf_file) as f:
37
  _conf = yaml.safe_load(f)
38
+
39
+ # check if the coarse, c2f, and codec ckpts exist
40
+ # otherwise, dont' add this model choice
41
+ if not (
42
+ Path(_conf["Interface.coarse_ckpt"]).exists() and
43
+ Path(_conf["Interface.coarse2fine_ckpt"]).exists() and
44
+ Path(_conf["Interface.codec_ckpt"]).exists()
45
+ ):
46
+ continue
47
+
48
  MODEL_CHOICES[conf_file.parent.name] = _conf
49
 
50
 
 
52
  OUT_DIR = Path("gradio-outputs")
53
  OUT_DIR.mkdir(exist_ok=True, parents=True)
54
 
55
+ MAX_DURATION_S = 60
56
  def load_audio(file):
57
  print(file)
58
  filepath = file.name
59
  sig = at.AudioSignal.salient_excerpt(
60
+ filepath, duration=MAX_DURATION_S
 
61
  )
62
+ # sig = interface.preprocess(sig)
63
+ sig = at.AudioSignal(filepath)
64
 
65
  out_dir = OUT_DIR / "tmp" / str(uuid.uuid4())
66
  out_dir.mkdir(parents=True, exist_ok=True)
 
71
  def load_example_audio():
72
  return "./assets/example.wav"
73
 
74
+ from torch_pitch_shift import pitch_shift, get_fast_shifts
75
+ def shift_pitch(signal, interval: int):
76
+ signal.samples = pitch_shift(
77
+ signal.samples,
78
+ shift=interval,
79
+ sample_rate=signal.sample_rate
80
+ )
81
+ return signal
82
+
83
+ def _vamp(seed, input_audio, model_choice, pitch_shift_amt, periodic_p, p2, n_mask_codebooks, n_mask_codebooks_2, rand_mask_intensity, prefix_s, suffix_s, periodic_w, onset_mask_width, dropout, masktemp, sampletemp, typical_filtering, typical_mass, typical_min_tokens, top_p, sample_cutoff, win_dur, num_feedback_steps, stretch_factor, api=False):
84
+ _seed = seed if seed > 0 else None
85
+ if _seed is None:
86
+ _seed = int(torch.randint(0, 2**32, (1,)).item())
87
+ at.util.seed(_seed)
88
+
89
+ datentime = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
90
+ out_dir = OUT_DIR / f"{Path(input_audio).stem}-{datentime}-seed-{_seed}-model-{model_choice}"
91
+ out_dir.mkdir(parents=True)
92
+ sig = at.AudioSignal(input_audio)
93
+ sig.write(out_dir / "input.wav")
94
 
95
+ # reload the model if necessary
96
+ interface.reload(
97
+ coarse_ckpt=MODEL_CHOICES[model_choice]["Interface.coarse_ckpt"],
98
+ c2f_ckpt=MODEL_CHOICES[model_choice]["Interface.coarse2fine_ckpt"],
99
+ )
100
 
101
  loudness = sig.loudness()
102
  print(f"input loudness is {loudness}")
103
 
104
+ if pitch_shift_amt != 0:
105
+ sig = shift_pitch(sig, pitch_shift_amt)
106
+
107
+ _p2 = periodic_p if p2 == 0 else p2
108
+ _n_codebooks_2 = n_mask_codebooks if n_mask_codebooks_2 == 0 else n_mask_codebooks_2
109
+
110
+ build_mask_kwargs = dict(
111
+ rand_mask_intensity=rand_mask_intensity,
112
+ prefix_s=prefix_s,
113
+ suffix_s=suffix_s,
114
+ periodic_prompt=int(periodic_p),
115
+ periodic_prompt2=int(_p2),
116
+ periodic_prompt_width=periodic_w,
117
+ onset_mask_width=onset_mask_width,
118
+ _dropout=dropout,
119
+ upper_codebook_mask=int(n_mask_codebooks),
120
+ upper_codebook_mask_2=int(_n_codebooks_2),
121
  )
122
 
123
+ vamp_kwargs = dict(
124
+ mask_temperature=masktemp*10,
125
+ sampling_temperature=sampletemp,
126
+ typical_filtering=typical_filtering,
127
+ typical_mass=typical_mass,
128
+ typical_min_tokens=typical_min_tokens,
129
+ top_p=top_p if top_p > 0 else None,
130
+ seed=_seed,
131
+ sample_cutoff=sample_cutoff,
 
 
 
 
 
 
 
 
 
132
  )
133
 
134
+ # save the mask as a txt file
135
+ interface.set_chunk_size(win_dur)
136
+ sig, mask, codes = interface.ez_vamp(
137
+ sig,
138
+ batch_size=4 if not api else 1,
139
+ feedback_steps=num_feedback_steps,
140
+ time_stretch_factor=stretch_factor,
141
+ build_mask_kwargs=build_mask_kwargs,
142
+ vamp_kwargs=vamp_kwargs,
143
+ return_mask=True,
144
  )
145
 
146
+ if api:
147
+ sig.write(out_dir / "out.wav")
148
+
149
+ return sig.path_to_file
150
+
151
+ if not api:
152
+ # write codes to numpy file
153
+ np.save(out_dir / "codes.npy", codes.cpu().numpy())
154
+ metadata = {}
155
+ metadata["seed"] = _seed
156
+ metadata["model_choice"] = model_choice
157
+ metadata["mask_kwargs"] = build_mask_kwargs
158
+ metadata["vamp_kwargs"] = vamp_kwargs
159
+ metadata["loudness"] = loudness
160
+ # save the metadata
161
+ with open(out_dir / "metadata.yml", "w") as f:
162
+ yaml.dump(metadata, f)
163
+
164
+ sig0 = sig[0].write(out_dir / "out1.wav")
165
+ sig1 = sig[1].write(out_dir / "out2.wav")
166
+ sig2 = sig[2].write(out_dir / "out3.wav")
167
+ sig3 = sig[3].write(out_dir / "out4.wav")
168
+
169
+ # write the mask to txt
170
+ with open(out_dir / "mask.txt", "w") as f:
171
+ m = mask[0].cpu().numpy()
172
+ # write to txt, each time step on a new line
173
+ for i in range(m.shape[-1]):
174
+ f.write(f"{m[:, i]}\n")
175
+
176
+
177
+ import matplotlib.pyplot as plt
178
+ plt.clf()
179
+ interface.visualize_codes(mask)
180
+ plt.savefig(out_dir / "mask.png")
181
+ plt.clf()
182
+ interface.visualize_codes(codes)
183
+ plt.savefig(out_dir / "codes.png")
184
+ plt.close()
185
+
186
+ # zip out dir, and return the path to the zip
187
+ shutil.make_archive(out_dir, 'zip', out_dir)
188
+
189
+ # chunk in groups of 1024 timesteps
190
+ _mask_sigs = []
191
+ for i in range(0, mask.shape[-1], 1024):
192
+ _mask_sigs.append(interface.to_signal(mask[:, :, i:i+1024].to(interface.device)).cpu())
193
+ mask = signal_concat(_mask_sigs)
194
+ mask.write(out_dir / "mask.wav")
195
+
196
+ return (
197
+ sig0.path_to_file, sig1.path_to_file,
198
+ sig2.path_to_file, sig3.path_to_file,
199
+ mask.path_to_file, str(out_dir.with_suffix(".zip")), out_dir / "mask.png"
200
+ )
201
 
202
+ def vamp(data):
203
+ return _vamp(
204
+ seed=data[seed],
205
+ input_audio=data[input_audio],
206
+ model_choice=data[model_choice],
207
+ pitch_shift_amt=data[pitch_shift_amt],
208
+ periodic_p=data[periodic_p],
209
+ p2=data[p2],
210
+ n_mask_codebooks=data[n_mask_codebooks],
211
+ n_mask_codebooks_2=data[n_mask_codebooks_2],
212
+ rand_mask_intensity=data[rand_mask_intensity],
213
+ prefix_s=data[prefix_s],
214
+ suffix_s=data[suffix_s],
215
+ periodic_w=data[periodic_w],
216
+ onset_mask_width=data[onset_mask_width],
217
+ dropout=data[dropout],
218
+ masktemp=data[masktemp],
219
+ sampletemp=data[sampletemp],
220
+ typical_filtering=data[typical_filtering],
221
+ typical_mass=data[typical_mass],
222
+ typical_min_tokens=data[typical_min_tokens],
223
+ top_p=data[top_p],
224
+ sample_cutoff=data[sample_cutoff],
225
+ win_dur=data[win_dur],
226
+ num_feedback_steps=data[num_feedback_steps],
227
+ stretch_factor=data[stretch_factor],
228
+ api=False,
229
+ )
230
 
231
+ def api_vamp(data):
232
+ return _vamp(
233
+ seed=data[seed],
234
+ input_audio=data[input_audio],
235
+ model_choice=data[model_choice],
236
+ pitch_shift_amt=data[pitch_shift_amt],
237
+ periodic_p=data[periodic_p],
238
+ p2=data[p2],
239
+ n_mask_codebooks=data[n_mask_codebooks],
240
+ n_mask_codebooks_2=data[n_mask_codebooks_2],
241
+ rand_mask_intensity=data[rand_mask_intensity],
242
+ prefix_s=data[prefix_s],
243
+ suffix_s=data[suffix_s],
244
+ periodic_w=data[periodic_w],
245
+ onset_mask_width=data[onset_mask_width],
246
+ dropout=data[dropout],
247
+ masktemp=data[masktemp],
248
+ sampletemp=data[sampletemp],
249
+ typical_filtering=data[typical_filtering],
250
+ typical_mass=data[typical_mass],
251
+ typical_min_tokens=data[typical_min_tokens],
252
+ top_p=data[top_p],
253
+ sample_cutoff=data[sample_cutoff],
254
+ win_dur=data[win_dur],
255
+ num_feedback_steps=data[num_feedback_steps],
256
+ stretch_factor=data[stretch_factor],
257
+ api=True,
258
+ )
259
 
 
260
 
261
+ def harp_vamp(input_audio,
262
+ periodic_p,
263
+ n_mask_codebooks,
264
+ pitch_shift_amt,
265
+ win_dur,
266
+ num_feedback_steps):
267
+ return _vamp(
268
+ seed=0,
269
+ input_audio=input_audio,
270
+ model_choice="default",
271
+ pitch_shift_amt=pitch_shift_amt,
272
+ periodic_p=periodic_p,
273
+ p2=0,
274
+ n_mask_codebooks=n_mask_codebooks,
275
+ n_mask_codebooks_2=0,
276
+ rand_mask_intensity=1.0,
277
+ prefix_s=0.0,
278
+ suffix_s=0.0,
279
+ periodic_w=1,
280
+ onset_mask_width=0,
281
+ dropout=0.0,
282
+ masktemp=1.5,
283
+ sampletemp=1.0,
284
+ typical_filtering=True,
285
+ typical_mass=0.15,
286
+ typical_min_tokens=64,
287
+ top_p=0.9,
288
+ sample_cutoff=1.0,
289
+ win_dur=win_dur,
290
+ num_feedback_steps=num_feedback_steps,
291
+ stretch_factor=1.0,
292
+ api=True,
293
+ )
294
+
295
+
296
 
297
  with gr.Blocks() as demo:
 
298
  with gr.Row():
299
  with gr.Column():
 
 
 
 
 
 
 
300
  manual_audio_upload = gr.File(
301
+ label=f"upload some audio (will be randomly trimmed to max of 100s)",
302
  file_types=["audio"]
303
  )
304
  load_example_audio_button = gr.Button("or load example audio")
 
309
  type="filepath",
310
  )
311
 
312
+ audio_mask = gr.Audio(
313
+ label="audio mask (listen to this to hear the mask hints)",
314
+ interactive=False,
315
+ type="filepath",
316
+ )
317
 
318
  # connect widgets
319
  load_example_audio_button.click(
 
328
  outputs=[ input_audio]
329
  )
330
 
331
+
332
+
333
  # mask settings
334
  with gr.Column():
335
+ with gr.Accordion("manual controls", open=True):
336
  periodic_p = gr.Slider(
337
  label="periodic prompt",
338
+ minimum=0,
339
+ maximum=128,
340
+ step=1,
341
+ value=3,
342
+ )
343
+ p2 = gr.Slider(
344
+ label="periodic prompt 2 (0 - same as p1, 2 - lots of hints, 8 - a couple of hints, 16 - occasional hint, 32 - very occasional hint, etc)",
345
+ minimum=0,
346
+ maximum=128,
347
  step=1,
348
+ value=0,
349
+ )
350
+
351
+ onset_mask_width = gr.Slider(
352
+ label="onset mask width (multiplies with the periodic mask, 1 step ~= 10milliseconds) ",
353
+ minimum=0,
354
+ maximum=100,
355
+ step=1,
356
+ value=0,
357
  )
358
 
359
  n_mask_codebooks = gr.Slider(
360
+ label="compression prompt ",
361
+ value=3,
362
  minimum=0,
363
+ maximum=14,
 
364
  step=1,
365
  )
366
+ n_mask_codebooks_2 = gr.Number(
367
+ label="compression prompt 2 via linear interpolation (0 == constant)",
368
+ value=0,
369
+ )
370
 
371
+ with gr.Accordion("extras ", open=False):
372
+ pitch_shift_amt = gr.Slider(
373
+ label="pitch shift amount (semitones)",
374
+ minimum=-12,
375
+ maximum=12,
376
+ step=1,
377
+ value=0,
378
  )
379
+
380
+ stretch_factor = gr.Slider(
381
+ label="time stretch factor",
382
+ minimum=0,
383
+ maximum=64,
384
+ step=1,
385
+ value=1,
386
+ )
387
+
388
+ rand_mask_intensity = gr.Slider(
389
+ label="random mask intensity. (If this is less than 1, scatters prompts throughout the audio, should be between 0.9 and 1.0)",
390
+ minimum=0.0,
391
+ maximum=1.0,
392
+ value=1.0
393
+ )
394
+
395
+ periodic_w = gr.Slider(
396
+ label="periodic prompt width (steps, 1 step ~= 10milliseconds)",
397
  minimum=1,
398
+ maximum=20,
399
+ step=1,
400
+ value=1,
401
+ )
402
+
403
+ with gr.Accordion("prefix/suffix prompts", open=True):
404
+ prefix_s = gr.Slider(
405
+ label="prefix hint length (seconds)",
406
+ minimum=0.0,
407
+ maximum=10.0,
408
+ value=0.0
409
+ )
410
+ suffix_s = gr.Slider(
411
+ label="suffix hint length (seconds)",
412
+ minimum=0.0,
413
+ maximum=10.0,
414
+ value=0.0
415
  )
416
 
417
+ masktemp = gr.Slider(
418
+ label="mask temperature",
419
+ minimum=0.0,
420
+ maximum=100.0,
421
+ value=1.5
422
+ )
423
+ sampletemp = gr.Slider(
424
+ label="sample temperature",
425
+ minimum=0.1,
426
+ maximum=10.0,
427
+ value=1.0,
428
+ step=0.001
429
+ )
430
+
431
+
432
+
433
+ with gr.Accordion("sampling settings", open=False):
434
+ top_p = gr.Slider(
435
+ label="top p (0.0 = off)",
436
+ minimum=0.0,
437
+ maximum=1.0,
438
+ value=0.9
439
+ )
440
+ typical_filtering = gr.Checkbox(
441
+ label="typical filtering ",
442
+ value=True
443
+ )
444
+ typical_mass = gr.Slider(
445
+ label="typical mass (should probably stay between 0.1 and 0.5)",
446
+ minimum=0.01,
447
+ maximum=0.99,
448
+ value=0.15
449
+ )
450
+ typical_min_tokens = gr.Slider(
451
+ label="typical min tokens (should probably stay between 1 and 256)",
452
+ minimum=1,
453
+ maximum=256,
454
  step=1,
455
+ value=64
456
  )
457
+ sample_cutoff = gr.Slider(
458
+ label="sample cutoff",
459
+ minimum=0.0,
460
+ maximum=1.0,
461
+ value=1.0,
462
+ step=0.01
463
+ )
464
+
465
+ dropout = gr.Slider(
466
+ label="mask dropout",
467
+ minimum=0.0,
468
+ maximum=1.0,
469
+ step=0.01,
470
+ value=0.0
471
+ )
472
 
473
 
474
+ seed = gr.Number(
475
+ label="seed (0 for random)",
476
+ value=0,
477
+ precision=0,
478
+ )
479
+
480
+
481
+
482
+ # mask settings
483
+ with gr.Column():
484
+
485
+ model_choice = gr.Dropdown(
486
+ label="model choice",
487
+ choices=list(MODEL_CHOICES.keys()),
488
+ value="default",
489
+ visible=True
490
+ )
491
+
492
+ num_feedback_steps = gr.Slider(
493
+ label="number of feedback steps (each one takes a while)",
494
+ minimum=1,
495
+ maximum=16,
496
+ step=1,
497
+ value=1
498
+ )
499
+
500
+ win_dur= gr.Slider(
501
+ label="window duration (seconds)",
502
+ minimum=2,
503
+ maximum=10,
504
+ value=6)
505
+
506
+
507
+ vamp_button = gr.Button("generate (vamp)!!!")
508
+ maskimg = gr.Image(
509
+ label="mask image",
510
  interactive=False,
511
  type="filepath"
512
  )
513
+ out1 = gr.Audio(
514
+ label="output audio 1",
515
+ interactive=False,
516
+ type="filepath"
517
+ )
518
+ out2 = gr.Audio(
519
+ label="output audio 2",
520
+ interactive=False,
521
+ type="filepath"
522
+ )
523
+ out3 = gr.Audio(
524
+ label="output audio 3",
525
+ interactive=False,
526
+ type="filepath"
527
+ )
528
+ out4 = gr.Audio(
529
+ label="output audio 4",
530
+ interactive=False,
531
+ type="filepath"
532
+ )
533
+
534
+ thank_you = gr.Markdown("")
535
+
536
+ # download all the outputs
537
+ download = gr.File(type="file", label="download outputs")
538
+
539
 
540
  _inputs = {
541
  input_audio,
542
+ masktemp,
543
  sampletemp,
544
+ top_p,
545
+ prefix_s, suffix_s,
546
+ rand_mask_intensity,
547
+ periodic_p, periodic_w,
548
+ dropout,
549
+ stretch_factor,
550
+ onset_mask_width,
551
+ typical_filtering,
552
+ typical_mass,
553
+ typical_min_tokens,
554
+ seed,
555
+ model_choice,
556
  n_mask_codebooks,
557
+ pitch_shift_amt,
558
+ sample_cutoff,
559
+ num_feedback_steps,
560
+ p2,
561
+ n_mask_codebooks_2,
562
+ win_dur
563
  }
564
 
565
  # connect widgets
566
  vamp_button.click(
567
+ fn=vamp,
568
  inputs=_inputs,
569
+ outputs=[out1, out2, out3, out4, audio_mask, download, maskimg],
570
+ )
571
+
572
+ api_vamp_button = gr.Button("api vamp", visible=False)
573
+ api_vamp_button.click(
574
+ fn=api_vamp,
575
+ inputs=_inputs,
576
+ outputs=[out1],
577
+ api_name="vamp"
578
  )
579
 
580
+ from pyharp import ModelCard, build_endpoint
581
+
582
+ model_card = ModelCard(
583
+ name="salad bowl",
584
+ description="sounds",
585
+ author="hugo flores garcía",
586
+ tags=["generative","sound"],
587
+ )
588
 
589
  build_endpoint(
590
+ inputs=[
591
+ input_audio,
592
+ periodic_p,
593
+ n_mask_codebooks,
594
+ pitch_shift_amt,
595
+ win_dur,
596
+ num_feedback_steps
597
+ ],
598
+ output=out1,
599
+ process_fn=harp_vamp,
600
+ card=model_card
601
  )
602
 
603
+
604
+ try:
605
+ demo.queue()
606
+ demo.launch(share=True)
607
+ except KeyboardInterrupt:
608
+ shutil.rmtree("gradio-outputs", ignore_errors=True)
609
+ raise