Spaces:
Sleeping
Sleeping
Hugo Flores Garcia
commited on
Commit
·
bfacd00
1
Parent(s):
e251e23
pin audiotools version!
Browse files- scripts/utils/process_folder-c2f.py +0 -124
- setup.py +1 -1
scripts/utils/process_folder-c2f.py
DELETED
@@ -1,124 +0,0 @@
|
|
1 |
-
|
2 |
-
from audiotools import AudioSignal
|
3 |
-
import torch
|
4 |
-
from pathlib import Path
|
5 |
-
import argbind
|
6 |
-
from tqdm import tqdm
|
7 |
-
import random
|
8 |
-
|
9 |
-
from typing import List
|
10 |
-
|
11 |
-
from collections import defaultdict
|
12 |
-
|
13 |
-
def coarse2fine_infer(
|
14 |
-
signal,
|
15 |
-
model,
|
16 |
-
vqvae,
|
17 |
-
device,
|
18 |
-
):
|
19 |
-
output = {}
|
20 |
-
w = signal
|
21 |
-
w = w.to(device)
|
22 |
-
z = vqvae.encode(w.audio_data, w.sample_rate)["codes"]
|
23 |
-
|
24 |
-
model.to(device)
|
25 |
-
output["reconstructed"] = model.to_signal(z, vqvae).cpu()
|
26 |
-
|
27 |
-
# make a full mask
|
28 |
-
mask = torch.ones_like(z)
|
29 |
-
mask[:, :model.n_conditioning_codebooks, :] = 0
|
30 |
-
|
31 |
-
output["sampled"] = model.sample(
|
32 |
-
codec=vqvae,
|
33 |
-
time_steps=z.shape[-1],
|
34 |
-
sampling_steps=12,
|
35 |
-
start_tokens=z,
|
36 |
-
mask=mask,
|
37 |
-
temperature=0.85,
|
38 |
-
top_k=None,
|
39 |
-
sample="gumbel",
|
40 |
-
typical_filtering=True,
|
41 |
-
return_signal=True
|
42 |
-
).cpu()
|
43 |
-
|
44 |
-
output["argmax"] = model.sample(
|
45 |
-
codec=vqvae,
|
46 |
-
time_steps=z.shape[-1],
|
47 |
-
sampling_steps=1,
|
48 |
-
start_tokens=z,
|
49 |
-
mask=mask,
|
50 |
-
temperature=1.0,
|
51 |
-
top_k=None,
|
52 |
-
sample="argmax",
|
53 |
-
typical_filtering=True,
|
54 |
-
return_signal=True
|
55 |
-
).cpu()
|
56 |
-
|
57 |
-
return output
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
@argbind.bind(without_prefix=True)
|
62 |
-
def main(
|
63 |
-
sources=[
|
64 |
-
"/data/spotdl/audio/val", "/data/spotdl/audio/test"
|
65 |
-
],
|
66 |
-
exp_name="noise_mode",
|
67 |
-
model_paths=[
|
68 |
-
"runs/c2f-exp-03.22.23/ckpt/mask/epoch=400/vampnet/weights.pth",
|
69 |
-
"runs/c2f-exp-03.22.23/ckpt/random/epoch=400/vampnet/weights.pth",
|
70 |
-
],
|
71 |
-
model_keys=[
|
72 |
-
"mask",
|
73 |
-
"random",
|
74 |
-
],
|
75 |
-
vqvae_path: str = "runs/codec-ckpt/codec.pth",
|
76 |
-
device: str = "cuda",
|
77 |
-
output_dir: str = ".",
|
78 |
-
max_excerpts: int = 5000,
|
79 |
-
duration: float = 3.0,
|
80 |
-
):
|
81 |
-
from vampnet.modules.transformer import VampNet
|
82 |
-
from lac.model.lac import LAC
|
83 |
-
|
84 |
-
models = {
|
85 |
-
k: VampNet.load(p) for k, p in zip(model_keys, model_paths)
|
86 |
-
}
|
87 |
-
for model in models.values():
|
88 |
-
model.eval()
|
89 |
-
print(f"Loaded {len(models)} models.")
|
90 |
-
|
91 |
-
vqvae = LAC.load(vqvae_path)
|
92 |
-
vqvae.to(device)
|
93 |
-
vqvae.eval()
|
94 |
-
print("Loaded VQVAE.")
|
95 |
-
|
96 |
-
output_dir = Path(output_dir) / f"{exp_name}-samples"
|
97 |
-
|
98 |
-
from audiotools.data.datasets import AudioLoader, AudioDataset
|
99 |
-
|
100 |
-
loader = AudioLoader(sources=sources)
|
101 |
-
dataset = AudioDataset(loader,
|
102 |
-
sample_rate=vqvae.sample_rate,
|
103 |
-
duration=duration,
|
104 |
-
n_examples=max_excerpts,
|
105 |
-
without_replacement=True,
|
106 |
-
)
|
107 |
-
for i in tqdm(range(max_excerpts)):
|
108 |
-
sig = dataset[i]["signal"]
|
109 |
-
sig.resample(vqvae.sample_rate).normalize(-24).ensure_max_of_audio(1.0)
|
110 |
-
|
111 |
-
for model_key, model in models.items():
|
112 |
-
out = coarse2fine_infer(sig, model, vqvae, device)
|
113 |
-
out_dir = output_dir / model_key / Path(sig.path_to_file).stem
|
114 |
-
out_dir.mkdir(parents=True, exist_ok=True)
|
115 |
-
for k, s in out.items():
|
116 |
-
s.write(out_dir / f"{k}.wav")
|
117 |
-
|
118 |
-
|
119 |
-
if __name__ == "__main__":
|
120 |
-
args = argbind.parse_args()
|
121 |
-
|
122 |
-
with argbind.scope(args):
|
123 |
-
main()
|
124 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
setup.py
CHANGED
@@ -28,7 +28,7 @@ setup(
|
|
28 |
install_requires=[
|
29 |
"torch",
|
30 |
"argbind>=0.3.2",
|
31 |
-
"audiotools @ git+https://github.com/descriptinc/audiotools.git",
|
32 |
"dac @ git+https://github.com/descriptinc/descript-audio-codec.git",
|
33 |
"gradio",
|
34 |
"tensorboardX",
|
|
|
28 |
install_requires=[
|
29 |
"torch",
|
30 |
"argbind>=0.3.2",
|
31 |
+
"audiotools @ git+https://github.com/descriptinc/audiotools.git@f35914b5b3c6f1bf589cd09481478d741538828e",
|
32 |
"dac @ git+https://github.com/descriptinc/descript-audio-codec.git",
|
33 |
"gradio",
|
34 |
"tensorboardX",
|