hugo flores garcia
commited on
Commit
•
3cd737d
1
Parent(s):
b90b198
stuff
Browse files
app.py
CHANGED
@@ -73,14 +73,8 @@ def load_example_audio():
|
|
73 |
return "./assets/example.wav"
|
74 |
|
75 |
|
76 |
-
def _vamp(
|
77 |
-
# remove any old files in the output directory (from previous runs)
|
78 |
-
shutil.rmtree(OUT_DIR)
|
79 |
-
OUT_DIR.mkdir()
|
80 |
|
81 |
-
out_dir = OUT_DIR / str(uuid.uuid4())
|
82 |
-
out_dir.mkdir()
|
83 |
-
sig = at.AudioSignal(data[input_audio])
|
84 |
sig = interface.preprocess(sig)
|
85 |
|
86 |
loudness = sig.loudness()
|
@@ -103,16 +97,10 @@ def _vamp(data, return_mask=False):
|
|
103 |
mask = pmask.codebook_mask(mask, int(data[n_mask_codebooks]))
|
104 |
|
105 |
print(f"sampletemp {data[sampletemp]}")
|
106 |
-
print(f"top_p {data[top_p]}")
|
107 |
print(f"num_steps {data[num_steps]}")
|
108 |
print(f"periodic_p {data[periodic_p]}")
|
109 |
print(f"seed {data[seed]}")
|
110 |
|
111 |
-
|
112 |
-
_top_p = data[top_p] if data[top_p] > 0 else None
|
113 |
-
# save the mask as a txt file
|
114 |
-
np.savetxt(out_dir / "mask.txt", mask[:,0,:].long().cpu().numpy())
|
115 |
-
|
116 |
_seed = data[seed] if data[seed] > 0 else None
|
117 |
print(f"processing coarse...")
|
118 |
zv, mask_z = interface.coarse_vamp(
|
@@ -122,41 +110,44 @@ def _vamp(data, return_mask=False):
|
|
122 |
mask_temperature=1.5*10,
|
123 |
sampling_temperature=data[sampletemp],
|
124 |
return_mask=True,
|
125 |
-
top_p=
|
126 |
gen_fn=interface.coarse.generate,
|
127 |
seed=_seed,
|
128 |
sample_cutoff=1.0,
|
129 |
)
|
130 |
|
131 |
-
|
132 |
-
|
133 |
-
zv
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
)
|
142 |
|
143 |
sig = interface.to_signal(zv).cpu()
|
144 |
print("done")
|
|
|
145 |
|
146 |
-
sig.write(out_dir / "output.wav")
|
147 |
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
|
|
|
|
|
|
|
|
|
|
154 |
|
155 |
-
def vamp(data):
|
156 |
-
return _vamp(data, return_mask=True)
|
157 |
|
158 |
-
|
159 |
-
|
|
|
160 |
|
161 |
|
162 |
with gr.Blocks() as demo:
|
@@ -228,16 +219,16 @@ with gr.Blocks() as demo:
|
|
228 |
label="number of steps (should normally be between 12 and 36)",
|
229 |
minimum=1,
|
230 |
maximum=128,
|
231 |
-
step=
|
232 |
-
value=
|
233 |
)
|
234 |
|
235 |
-
|
236 |
-
label="
|
237 |
-
minimum=
|
238 |
-
maximum=
|
239 |
-
step=
|
240 |
-
value=
|
241 |
)
|
242 |
|
243 |
|
@@ -247,10 +238,8 @@ with gr.Blocks() as demo:
|
|
247 |
precision=0,
|
248 |
)
|
249 |
|
250 |
-
# mask settings
|
251 |
-
with gr.Column():
|
252 |
|
253 |
-
vamp_button = gr.Button("
|
254 |
output_audio = gr.Audio(
|
255 |
label="output audio",
|
256 |
interactive=False,
|
@@ -262,23 +251,22 @@ with gr.Blocks() as demo:
|
|
262 |
num_steps,
|
263 |
sampletemp,
|
264 |
periodic_p,
|
265 |
-
dropout,
|
266 |
seed,
|
267 |
n_mask_codebooks,
|
268 |
}
|
269 |
|
270 |
# connect widgets
|
271 |
vamp_button.click(
|
272 |
-
fn=
|
273 |
inputs=_inputs,
|
274 |
outputs=[output_audio],
|
275 |
)
|
276 |
|
277 |
|
278 |
build_endpoint(
|
279 |
-
inputs=
|
280 |
output=output_audio,
|
281 |
-
process_fn=
|
282 |
card=ModelCard(
|
283 |
name="vampnet",
|
284 |
description="turn your music into NES music!! quick!! NOTE: vampnet's has a maximum context length of 10 seconds. Please split all audio clips into 10 second chunks, or processing will result in an error. ",
|
|
|
73 |
return "./assets/example.wav"
|
74 |
|
75 |
|
76 |
+
def _vamp(sig, data):
|
|
|
|
|
|
|
77 |
|
|
|
|
|
|
|
78 |
sig = interface.preprocess(sig)
|
79 |
|
80 |
loudness = sig.loudness()
|
|
|
97 |
mask = pmask.codebook_mask(mask, int(data[n_mask_codebooks]))
|
98 |
|
99 |
print(f"sampletemp {data[sampletemp]}")
|
|
|
100 |
print(f"num_steps {data[num_steps]}")
|
101 |
print(f"periodic_p {data[periodic_p]}")
|
102 |
print(f"seed {data[seed]}")
|
103 |
|
|
|
|
|
|
|
|
|
|
|
104 |
_seed = data[seed] if data[seed] > 0 else None
|
105 |
print(f"processing coarse...")
|
106 |
zv, mask_z = interface.coarse_vamp(
|
|
|
110 |
mask_temperature=1.5*10,
|
111 |
sampling_temperature=data[sampletemp],
|
112 |
return_mask=True,
|
113 |
+
top_p=0.85,
|
114 |
gen_fn=interface.coarse.generate,
|
115 |
seed=_seed,
|
116 |
sample_cutoff=1.0,
|
117 |
)
|
118 |
|
119 |
+
print(f"processing coarse to fine...")
|
120 |
+
zv = interface.coarse_to_fine(
|
121 |
+
zv,
|
122 |
+
mask_temperature=1.5*10,
|
123 |
+
sampling_temperature=data[sampletemp],
|
124 |
+
mask=mask,
|
125 |
+
sampling_steps=data[num_steps] // 2,
|
126 |
+
sample_cutoff=1.0,
|
127 |
+
seed=_seed,
|
128 |
+
)
|
|
|
129 |
|
130 |
sig = interface.to_signal(zv).cpu()
|
131 |
print("done")
|
132 |
+
return sig
|
133 |
|
|
|
134 |
|
135 |
+
def process_fn(data):
|
136 |
+
# remove any old files in the output directory (from previous runs)
|
137 |
+
shutil.rmtree(OUT_DIR)
|
138 |
+
OUT_DIR.mkdir()
|
139 |
+
|
140 |
+
out_dir = OUT_DIR / str(uuid.uuid4())
|
141 |
+
out_dir.mkdir()
|
142 |
+
sig = at.AudioSignal(data[input_audio])
|
143 |
+
|
144 |
+
for _pass in range(data[num_passes]):
|
145 |
+
pass
|
146 |
|
|
|
|
|
147 |
|
148 |
+
sig.write(out_dir / "output.wav")
|
149 |
+
|
150 |
+
return sig.path_to_file
|
151 |
|
152 |
|
153 |
with gr.Blocks() as demo:
|
|
|
219 |
label="number of steps (should normally be between 12 and 36)",
|
220 |
minimum=1,
|
221 |
maximum=128,
|
222 |
+
step=6,
|
223 |
+
value=24
|
224 |
)
|
225 |
|
226 |
+
num_passes = gr.Slider(
|
227 |
+
label="number of passes (more passes = more time, but better results)",
|
228 |
+
minimum=2,
|
229 |
+
maximum=8,
|
230 |
+
step=1,
|
231 |
+
value=4
|
232 |
)
|
233 |
|
234 |
|
|
|
238 |
precision=0,
|
239 |
)
|
240 |
|
|
|
|
|
241 |
|
242 |
+
vamp_button = gr.Button("nes, quick!!!!!")
|
243 |
output_audio = gr.Audio(
|
244 |
label="output audio",
|
245 |
interactive=False,
|
|
|
251 |
num_steps,
|
252 |
sampletemp,
|
253 |
periodic_p,
|
|
|
254 |
seed,
|
255 |
n_mask_codebooks,
|
256 |
}
|
257 |
|
258 |
# connect widgets
|
259 |
vamp_button.click(
|
260 |
+
fn=process_fn,
|
261 |
inputs=_inputs,
|
262 |
outputs=[output_audio],
|
263 |
)
|
264 |
|
265 |
|
266 |
build_endpoint(
|
267 |
+
inputs=_inputs,
|
268 |
output=output_audio,
|
269 |
+
process_fn=process_fn,
|
270 |
card=ModelCard(
|
271 |
name="vampnet",
|
272 |
description="turn your music into NES music!! quick!! NOTE: vampnet's has a maximum context length of 10 seconds. Please split all audio clips into 10 second chunks, or processing will result in an error. ",
|