Hugo Flores Garcia commited on
Commit
bccd2ea
1 Parent(s): 3419098
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitignore +190 -0
  2. LICENSE +21 -0
  3. README.md +113 -6
  4. app.py +678 -0
  5. assets/example.wav +0 -0
  6. conf/c2f.yml +14 -0
  7. conf/generated/bbc-humans/c2f.yml +15 -0
  8. conf/generated/bbc-humans/coarse.yml +8 -0
  9. conf/generated/bbc-humans/interface.yml +6 -0
  10. conf/generated/boleros/c2f.yml +15 -0
  11. conf/generated/boleros/coarse.yml +8 -0
  12. conf/generated/boleros/interface.yml +6 -0
  13. conf/generated/bowl/c2f.yml +16 -0
  14. conf/generated/bowl/coarse.yml +9 -0
  15. conf/generated/bowl/interface.yml +7 -0
  16. conf/generated/breaks-steps/interface.yml +8 -0
  17. conf/generated/choir/interface.yml +9 -0
  18. conf/generated/earlymachines/c2f.yml +15 -0
  19. conf/generated/earlymachines/coarse.yml +8 -0
  20. conf/generated/earlymachines/interface.yml +8 -0
  21. conf/generated/funk/c2f.yml +15 -0
  22. conf/generated/funk/coarse.yml +8 -0
  23. conf/generated/funk/interface.yml +8 -0
  24. conf/generated/ismir-birds/c2f.yml +15 -0
  25. conf/generated/ismir-birds/coarse.yml +8 -0
  26. conf/generated/ismir-birds/interface.yml +8 -0
  27. conf/generated/ismir-machines/c2f.yml +15 -0
  28. conf/generated/ismir-machines/coarse.yml +8 -0
  29. conf/generated/ismir-machines/interface.yml +8 -0
  30. conf/generated/machines/c2f.yml +15 -0
  31. conf/generated/machines/coarse.yml +8 -0
  32. conf/generated/machines/interface.yml +8 -0
  33. conf/generated/musdb/c2f.yml +40 -0
  34. conf/generated/musdb/coarse.yml +31 -0
  35. conf/generated/musdb/interface.yml +8 -0
  36. conf/generated/n64/c2f.yml +15 -0
  37. conf/generated/n64/coarse.yml +8 -0
  38. conf/generated/n64/interface.yml +6 -0
  39. conf/generated/natural-sounds/c2f.yml +28 -0
  40. conf/generated/natural-sounds/coarse.yml +21 -0
  41. conf/generated/natural-sounds/interface.yml +7 -0
  42. conf/generated/nes/c2f.yml +15 -0
  43. conf/generated/nes/coarse.yml +8 -0
  44. conf/generated/nes/interface.yml +6 -0
  45. conf/generated/nyc-subway/c2f.yml +15 -0
  46. conf/generated/nyc-subway/coarse.yml +8 -0
  47. conf/generated/nyc-subway/interface.yml +8 -0
  48. conf/generated/ocean-waves/c2f.yml +15 -0
  49. conf/generated/ocean-waves/coarse.yml +8 -0
  50. conf/generated/ocean-waves/interface.yml +8 -0
.gitignore ADDED
@@ -0,0 +1,190 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ pip-wheel-metadata/
24
+ share/python-wheels/
25
+ *.egg-info/
26
+ .installed.cfg
27
+ *.egg
28
+ MANIFEST
29
+
30
+ # PyInstaller
31
+ # Usually these files are written by a python script from a template
32
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
33
+ *.manifest
34
+ *.spec
35
+
36
+ # Installer logs
37
+ pip-log.txt
38
+ pip-delete-this-directory.txt
39
+
40
+ # Unit test / coverage reports
41
+ htmlcov/
42
+ .tox/
43
+ .nox/
44
+ .coverage
45
+ .coverage.*
46
+ .cache
47
+ nosetests.xml
48
+ coverage.xml
49
+ *.cover
50
+ *.py,cover
51
+ .hypothesis/
52
+ .pytest_cache/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ target/
76
+
77
+ # Jupyter Notebook
78
+ .ipynb_checkpoints
79
+
80
+ # IPython
81
+ profile_default/
82
+ ipython_config.py
83
+
84
+ # pyenv
85
+ .python-version
86
+
87
+ # pipenv
88
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
90
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
91
+ # install all needed dependencies.
92
+ #Pipfile.lock
93
+
94
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95
+ __pypackages__/
96
+
97
+ # Celery stuff
98
+ celerybeat-schedule
99
+ celerybeat.pid
100
+
101
+ # SageMath parsed files
102
+ *.sage.py
103
+
104
+ # Environments
105
+ .env
106
+ .venv
107
+ env/env.sh
108
+ venv/
109
+ env.bak/
110
+ venv.bak/
111
+
112
+ # Spyder project settings
113
+ .spyderproject
114
+ .spyproject
115
+
116
+ # Rope project settings
117
+ .ropeproject
118
+
119
+ # mkdocs documentation
120
+ /site
121
+
122
+ # mypy
123
+ .mypy_cache/
124
+ .dmypy.json
125
+ dmypy.json
126
+
127
+ # Pyre type checker
128
+ .pyre/
129
+
130
+ # Files created by experiments
131
+ output/
132
+ snapshot/
133
+ *.m4a
134
+ notebooks/scratch.ipynb
135
+ notebooks/inspect.ipynb
136
+ notebooks/effects.ipynb
137
+ notebooks/*.ipynb
138
+ notebooks/*.gif
139
+ notebooks/*.wav
140
+ notebooks/*.mp4
141
+ *runs/
142
+ boards/
143
+ samples/
144
+ *.ipynb
145
+
146
+ results.json
147
+ metrics.csv
148
+ mprofile_*
149
+ mem.png
150
+
151
+ results/
152
+ mprofile*
153
+ *.png
154
+ # do not ignore the test wav file
155
+ !tests/audio/short_test_audio.wav
156
+ !tests/audio/output.wav
157
+ */.DS_Store
158
+ .DS_Store
159
+ env.sh
160
+ _codebraid/
161
+ **/*.html
162
+ **/*.exec.md
163
+ flagged/
164
+ log.txt
165
+ ckpt/
166
+ .syncthing*
167
+ tests/assets/
168
+ archived/
169
+
170
+ scratch/
171
+
172
+ runs-archive
173
+ lyrebird-audiotools
174
+ lyrebird-audio-codec
175
+ samples-*/**
176
+
177
+ gradio-outputs/
178
+ samples*/
179
+ models-all/
180
+ models.zip
181
+ .git-old
182
+
183
+
184
+
185
+ gtzan.zip
186
+ .gtzan_emb_cache
187
+
188
+
189
+ data/
190
+ data
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2023 Hugo Flores García and Prem Seetharaman
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md CHANGED
@@ -1,13 +1,120 @@
1
  ---
2
  title: Salad Bowl
3
- emoji:
4
- colorFrom: pink
5
- colorTo: gray
6
  sdk: gradio
7
- sdk_version: 4.12.0
8
  app_file: app.py
9
  pinned: false
10
- license: cc-by-nc-sa-4.0
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  title: Salad Bowl
3
+ emoji: 🥗
4
+ colorFrom: yellow
5
+ colorTo: green
6
  sdk: gradio
7
+ sdk_version: 4.11.0
8
  app_file: app.py
9
  pinned: false
10
+ license: cc-by-nc-4.0
11
  ---
12
 
13
+ # VampNet
14
+
15
+ This repository contains recipes for training generative music models on top of the Descript Audio Codec.
16
+
17
+ ## try `unloop`
18
+ you can try vampnet in a co-creative looper called unloop. see this link: https://github.com/hugofloresgarcia/unloop
19
+
20
+ # Setting up
21
+
22
+ **Requires Python 3.9**.
23
+
24
+ you'll need a Python 3.9 environment to run VampNet. This is due to a [known issue with madmom](https://github.com/hugofloresgarcia/vampnet/issues/15).
25
+
26
+ (for example, using conda)
27
+ ```bash
28
+ conda create -n vampnet python=3.9
29
+ conda activate vampnet
30
+ ```
31
+
32
+
33
+ install VampNet
34
+
35
+ ```bash
36
+ git clone https://github.com/hugofloresgarcia/vampnet.git
37
+ pip install -e ./vampnet
38
+ ```
39
+
40
+ ## A note on argbind
41
+ This repository relies on [argbind](https://github.com/pseeth/argbind) to manage CLIs and config files.
42
+ Config files are stored in the `conf/` folder.
43
+
44
+ ## Getting the Pretrained Models
45
+
46
+ ### Licensing for Pretrained Models:
47
+ The weights for the models are licensed [`CC BY-NC-SA 4.0`](https://creativecommons.org/licenses/by-nc-sa/4.0/deed.ml). Likewise, any VampNet models fine-tuned on the pretrained models are also licensed [`CC BY-NC-SA 4.0`](https://creativecommons.org/licenses/by-nc-sa/4.0/deed.ml).
48
+
49
+ Download the pretrained models from [this link](https://zenodo.org/record/8136629). Then, extract the models to the `models/` folder.
50
+
51
+
52
+ # Usage
53
+
54
+ ## Launching the Gradio Interface
55
+ You can launch a gradio UI to play with vampnet.
56
+
57
+ ```bash
58
+ python app.py --args.load conf/interface.yml --Interface.device cuda
59
+ ```
60
+
61
+ # Training / Fine-tuning
62
+
63
+ ## Training a model
64
+
65
+ To train a model, run the following script:
66
+
67
+ ```bash
68
+ python scripts/exp/train.py --args.load conf/vampnet.yml --save_path /path/to/checkpoints
69
+ ```
70
+
71
+ for multi-gpu training, use torchrun:
72
+
73
+ ```bash
74
+ torchrun --nproc_per_node gpu scripts/exp/train.py --args.load conf/vampnet.yml --save_path path/to/ckpt
75
+ ```
76
+
77
+ You can edit `conf/vampnet.yml` to change the dataset paths or any training hyperparameters.
78
+
79
+ For coarse2fine models, you can use `conf/c2f.yml` as a starting configuration.
80
+
81
+ See `python scripts/exp/train.py -h` for a list of options.
82
+
83
+ ## Debugging training
84
+
85
+ To debug training, it's easier to debug with 1 gpu and 0 workers
86
+
87
+ ```bash
88
+ CUDA_VISIBLE_DEVICES=0 python -m pdb scripts/exp/train.py --args.load conf/vampnet.yml --save_path /path/to/checkpoints --num_workers 0
89
+ ```
90
+
91
+ ## Fine-tuning
92
+ To fine-tune a model, use the script in `scripts/exp/fine_tune.py` to generate 3 configuration files: `c2f.yml`, `coarse.yml`, and `interface.yml`.
93
+ The first two are used to fine-tune the coarse and fine models, respectively. The last one is used to launch the gradio interface.
94
+
95
+ ```bash
96
+ python scripts/exp/fine_tune.py "/path/to/audio1.mp3 /path/to/audio2/ /path/to/audio3.wav" <fine_tune_name>
97
+ ```
98
+
99
+ This will create a folder under `conf/<fine_tune_name>/` with the 3 configuration files.
100
+
101
+ The save_paths will be set to `runs/<fine_tune_name>/coarse` and `runs/<fine_tune_name>/c2f`.
102
+
103
+ launch the coarse job:
104
+ ```bash
105
+ python scripts/exp/train.py --args.load conf/generated/<fine_tune_name>/coarse.yml
106
+ ```
107
+
108
+ this will save the coarse model to `runs/<fine_tune_name>/coarse/ckpt/best/`.
109
+
110
+ launch the c2f job:
111
+ ```bash
112
+ python scripts/exp/train.py --args.load conf/generated/<fine_tune_name>/c2f.yml
113
+ ```
114
+
115
+ launch the interface:
116
+ ```bash
117
+ python app.py --args.load conf/generated/<fine_tune_name>/interface.yml
118
+ ```
119
+
120
+
app.py ADDED
@@ -0,0 +1,678 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ import yaml
3
+ import uuid
4
+
5
+ import numpy as np
6
+ import audiotools as at
7
+ import argbind
8
+ import shutil
9
+ import torch
10
+
11
+ import gradio as gr
12
+ from vampnet.interface import Interface
13
+ from vampnet import mask as pmask
14
+
15
+ device = "cuda" if torch.cuda.is_available() else "cpu"
16
+
17
+ interface = Interface(
18
+ device=device,
19
+ coarse_ckpt="models/vampnet/coarse.pth",
20
+ coarse2fine_ckpt="models/vampnet/c2f.pth",
21
+ codec_ckpt="models/vampnet/codec.pth",
22
+ )
23
+
24
+ # populate the model choices with any interface.yml files in the generated confs
25
+ MODEL_CHOICES = {
26
+ "default": {
27
+ "Interface.coarse_ckpt": str(interface.coarse_path),
28
+ "Interface.coarse2fine_ckpt": str(interface.c2f_path),
29
+ "Interface.codec_ckpt": str(interface.codec_path),
30
+ }
31
+ }
32
+ generated_confs = Path("conf/generated")
33
+ for conf_file in generated_confs.glob("*/interface.yml"):
34
+ with open(conf_file) as f:
35
+ _conf = yaml.safe_load(f)
36
+ MODEL_CHOICES[conf_file.parent.name] = _conf
37
+
38
+
39
+
40
+ OUT_DIR = Path("gradio-outputs")
41
+ OUT_DIR.mkdir(exist_ok=True, parents=True)
42
+
43
+ def load_audio(file):
44
+ print(file)
45
+ filepath = file.name
46
+ sig = at.AudioSignal.salient_excerpt(
47
+ filepath,
48
+ duration=interface.coarse.chunk_size_s
49
+ )
50
+ sig = interface.preprocess(sig)
51
+
52
+ out_dir = OUT_DIR / "tmp" / str(uuid.uuid4())
53
+ out_dir.mkdir(parents=True, exist_ok=True)
54
+ sig.write(out_dir / "input.wav")
55
+ return sig.path_to_file
56
+
57
+
58
+ def load_example_audio():
59
+ return "./assets/example.wav"
60
+
61
+ from torch_pitch_shift import pitch_shift, get_fast_shifts
62
+ def shift_pitch(signal, interval: int):
63
+ signal.samples = pitch_shift(
64
+ signal.samples,
65
+ shift=interval,
66
+ sample_rate=signal.sample_rate
67
+ )
68
+ return signal
69
+
70
+ def _vamp(data, return_mask=False):
71
+
72
+ # remove any old files in the output directory (from previous runs)
73
+ shutil.rmtree(OUT_DIR)
74
+ OUT_DIR.mkdir()
75
+
76
+ out_dir = OUT_DIR / str(uuid.uuid4())
77
+ out_dir.mkdir()
78
+
79
+ sig = at.AudioSignal(data[input_audio])
80
+ sig = interface.preprocess(sig)
81
+
82
+
83
+ # reload the model if necessary
84
+ interface.reload(
85
+ coarse_ckpt=MODEL_CHOICES[data[model_choice]]["Interface.coarse_ckpt"],
86
+ c2f_ckpt=MODEL_CHOICES[data[model_choice]]["Interface.coarse2fine_ckpt"],
87
+ )
88
+
89
+ loudness = sig.loudness()
90
+ print(f"input loudness is {loudness}")
91
+
92
+ if data[pitch_shift_amt] != 0:
93
+ sig = shift_pitch(sig, data[pitch_shift_amt])
94
+
95
+ z = interface.encode(sig)
96
+
97
+ ncc = data[n_conditioning_codebooks]
98
+
99
+ # build the mask
100
+ mask = pmask.linear_random(z, data[rand_mask_intensity])
101
+ mask = pmask.mask_and(
102
+ mask, pmask.inpaint(
103
+ z,
104
+ interface.s2t(data[prefix_s]),
105
+ interface.s2t(data[suffix_s])
106
+ )
107
+ )
108
+ mask = pmask.mask_and(
109
+ mask, pmask.periodic_mask(
110
+ z,
111
+ data[periodic_p],
112
+ data[periodic_w],
113
+ random_roll=True
114
+ )
115
+ )
116
+ if data[onset_mask_width] > 0:
117
+ mask = pmask.mask_or(
118
+ mask, pmask.onset_mask(sig, z, interface, width=data[onset_mask_width])
119
+ )
120
+ if data[beat_mask_width] > 0:
121
+ beat_mask = interface.make_beat_mask(
122
+ sig,
123
+ after_beat_s=(data[beat_mask_width]/1000),
124
+ mask_upbeats=not data[beat_mask_downbeats],
125
+ )
126
+ mask = pmask.mask_and(mask, beat_mask)
127
+
128
+ # these should be the last two mask ops
129
+ mask = pmask.dropout(mask, data[dropout])
130
+ mask = pmask.codebook_unmask(mask, ncc)
131
+ mask = pmask.codebook_mask(mask, int(data[n_mask_codebooks]))
132
+
133
+ print(f"dropout {data[dropout]}")
134
+ print(f"masktemp {data[masktemp]}")
135
+ print(f"sampletemp {data[sampletemp]}")
136
+ print(f"top_p {data[top_p]}")
137
+ print(f"prefix_s {data[prefix_s]}")
138
+ print(f"suffix_s {data[suffix_s]}")
139
+ print(f"rand_mask_intensity {data[rand_mask_intensity]}")
140
+ print(f"num_steps {data[num_steps]}")
141
+ print(f"periodic_p {data[periodic_p]}")
142
+ print(f"periodic_w {data[periodic_w]}")
143
+ print(f"n_conditioning_codebooks {data[n_conditioning_codebooks]}")
144
+ print(f"use_coarse2fine {data[use_coarse2fine]}")
145
+ print(f"onset_mask_width {data[onset_mask_width]}")
146
+ print(f"beat_mask_width {data[beat_mask_width]}")
147
+ print(f"beat_mask_downbeats {data[beat_mask_downbeats]}")
148
+ print(f"stretch_factor {data[stretch_factor]}")
149
+ print(f"seed {data[seed]}")
150
+ print(f"pitch_shift_amt {data[pitch_shift_amt]}")
151
+ print(f"sample_cutoff {data[sample_cutoff]}")
152
+
153
+
154
+ _top_p = data[top_p] if data[top_p] > 0 else None
155
+ # save the mask as a txt file
156
+ np.savetxt(out_dir / "mask.txt", mask[:,0,:].long().cpu().numpy())
157
+
158
+ _seed = data[seed] if data[seed] > 0 else None
159
+ print(f"processing coarse...")
160
+ zv, mask_z = interface.coarse_vamp(
161
+ z,
162
+ mask=mask,
163
+ sampling_steps=data[num_steps],
164
+ mask_temperature=data[masktemp]*10,
165
+ sampling_temperature=data[sampletemp],
166
+ return_mask=True,
167
+ typical_filtering=data[typical_filtering],
168
+ typical_mass=data[typical_mass],
169
+ typical_min_tokens=data[typical_min_tokens],
170
+ top_p=_top_p,
171
+ gen_fn=interface.coarse.generate,
172
+ seed=_seed,
173
+ sample_cutoff=data[sample_cutoff],
174
+ )
175
+
176
+ if use_coarse2fine:
177
+ print(f"processing coarse to fine...")
178
+ zv = interface.coarse_to_fine(
179
+ zv,
180
+ mask_temperature=data[masktemp]*10,
181
+ sampling_temperature=data[sampletemp],
182
+ mask=mask,
183
+ sampling_steps=data[num_steps] // 2,
184
+ sample_cutoff=data[sample_cutoff],
185
+ seed=_seed,
186
+ )
187
+
188
+ sig = interface.to_signal(zv).cpu()
189
+ print("done")
190
+
191
+ print(f"output loudness is {sig.loudness()}")
192
+ sig = sig.normalize(loudness)
193
+ print(f"normalized loudness is {sig.loudness()}")
194
+ print("\n")
195
+
196
+ sig.write(out_dir / "output.wav")
197
+
198
+ if return_mask:
199
+ mask = interface.to_signal(mask_z).cpu()
200
+ mask.write(out_dir / "mask.wav")
201
+ return sig.path_to_file, mask.path_to_file
202
+ else:
203
+ return sig.path_to_file
204
+
205
+ def vamp(data):
206
+ return _vamp(data, return_mask=True)
207
+
208
+ def api_vamp(data):
209
+ return _vamp(data, return_mask=False)
210
+
211
+ def save_vamp(data):
212
+ out_dir = OUT_DIR / "saved" / str(uuid.uuid4())
213
+ out_dir.mkdir(parents=True, exist_ok=True)
214
+
215
+ sig_in = at.AudioSignal(data[input_audio])
216
+ sig_out = at.AudioSignal(data[output_audio])
217
+
218
+ sig_in.write(out_dir / "input.wav")
219
+ sig_out.write(out_dir / "output.wav")
220
+
221
+ _data = {
222
+ "masktemp": data[masktemp],
223
+ "sampletemp": data[sampletemp],
224
+ "top_p": data[top_p],
225
+ "prefix_s": data[prefix_s],
226
+ "suffix_s": data[suffix_s],
227
+ "rand_mask_intensity": data[rand_mask_intensity],
228
+ "num_steps": data[num_steps],
229
+ "notes": data[notes_text],
230
+ "periodic_period": data[periodic_p],
231
+ "periodic_width": data[periodic_w],
232
+ "n_conditioning_codebooks": data[n_conditioning_codebooks],
233
+ "use_coarse2fine": data[use_coarse2fine],
234
+ "stretch_factor": data[stretch_factor],
235
+ "seed": data[seed],
236
+ "samplecutoff": data[sample_cutoff],
237
+ }
238
+
239
+ # save with yaml
240
+ with open(out_dir / "data.yaml", "w") as f:
241
+ yaml.dump(_data, f)
242
+
243
+ import zipfile
244
+ zip_path = str(out_dir.with_suffix(".zip"))
245
+ with zipfile.ZipFile(zip_path, "w") as zf:
246
+ for file in out_dir.iterdir():
247
+ zf.write(file, file.name)
248
+
249
+ return f"saved! your save code is {out_dir.stem}", zip_path
250
+
251
+
252
+ def harp_vamp(_input_audio, _beat_mask_width, _sampletemp):
253
+
254
+ out_dir = OUT_DIR / str(uuid.uuid4())
255
+ out_dir.mkdir()
256
+ sig = at.AudioSignal(_input_audio)
257
+ sig = interface.preprocess(sig)
258
+
259
+ z = interface.encode(sig)
260
+
261
+ # build the mask
262
+ mask = pmask.linear_random(z, 1.0)
263
+ if _beat_mask_width > 0:
264
+ beat_mask = interface.make_beat_mask(
265
+ sig,
266
+ after_beat_s=(_beat_mask_width/1000),
267
+ )
268
+ mask = pmask.mask_and(mask, beat_mask)
269
+
270
+ # save the mask as a txt file
271
+ zv, mask_z = interface.coarse_vamp(
272
+ z,
273
+ mask=mask,
274
+ sampling_temperature=_sampletemp,
275
+ return_mask=True,
276
+ gen_fn=interface.coarse.generate,
277
+ )
278
+
279
+
280
+ zv = interface.coarse_to_fine(
281
+ zv,
282
+ sampling_temperature=_sampletemp,
283
+ mask=mask,
284
+ )
285
+
286
+ sig = interface.to_signal(zv).cpu()
287
+ print("done")
288
+
289
+ sig.write(out_dir / "output.wav")
290
+
291
+ return sig.path_to_file
292
+
293
+ with gr.Blocks() as demo:
294
+
295
+ with gr.Row():
296
+ with gr.Column():
297
+ gr.Markdown("# VampNet Audio Vamping")
298
+ gr.Markdown("""## Description:
299
+ This is a demo of the VampNet, a generative audio model that transforms the input audio based on the chosen settings.
300
+ You can control the extent and nature of variation with a set of manual controls and presets.
301
+ Use this interface to experiment with different mask settings and explore the audio outputs.
302
+ """)
303
+
304
+ gr.Markdown("""
305
+ ## Instructions:
306
+ 1. You can start by uploading some audio, or by loading the example audio.
307
+ 2. Choose a preset for the vamp operation, or manually adjust the controls to customize the mask settings.
308
+ 3. Click the "generate (vamp)!!!" button to apply the vamp operation. Listen to the output audio.
309
+ 4. Optionally, you can add some notes and save the result.
310
+ 5. You can also use the output as the new input and continue experimenting!
311
+ """)
312
+ with gr.Row():
313
+ with gr.Column():
314
+
315
+
316
+ manual_audio_upload = gr.File(
317
+ label=f"upload some audio (will be randomly trimmed to max of {interface.coarse.chunk_size_s:.2f}s)",
318
+ file_types=["audio"]
319
+ )
320
+ load_example_audio_button = gr.Button("or load example audio")
321
+
322
+ input_audio = gr.Audio(
323
+ label="input audio",
324
+ interactive=False,
325
+ type="filepath",
326
+ )
327
+
328
+ audio_mask = gr.Audio(
329
+ label="audio mask (listen to this to hear the mask hints)",
330
+ interactive=False,
331
+ type="filepath",
332
+ )
333
+
334
+ # connect widgets
335
+ load_example_audio_button.click(
336
+ fn=load_example_audio,
337
+ inputs=[],
338
+ outputs=[ input_audio]
339
+ )
340
+
341
+ manual_audio_upload.change(
342
+ fn=load_audio,
343
+ inputs=[manual_audio_upload],
344
+ outputs=[ input_audio]
345
+ )
346
+
347
+ # mask settings
348
+ with gr.Column():
349
+
350
+
351
+ presets = {
352
+ "unconditional": {
353
+ "periodic_p": 0,
354
+ "onset_mask_width": 0,
355
+ "beat_mask_width": 0,
356
+ "beat_mask_downbeats": False,
357
+ },
358
+ "slight periodic variation": {
359
+ "periodic_p": 5,
360
+ "onset_mask_width": 5,
361
+ "beat_mask_width": 0,
362
+ "beat_mask_downbeats": False,
363
+ },
364
+ "moderate periodic variation": {
365
+ "periodic_p": 13,
366
+ "onset_mask_width": 5,
367
+ "beat_mask_width": 0,
368
+ "beat_mask_downbeats": False,
369
+ },
370
+ "strong periodic variation": {
371
+ "periodic_p": 17,
372
+ "onset_mask_width": 5,
373
+ "beat_mask_width": 0,
374
+ "beat_mask_downbeats": False,
375
+ },
376
+ "very strong periodic variation": {
377
+ "periodic_p": 21,
378
+ "onset_mask_width": 5,
379
+ "beat_mask_width": 0,
380
+ "beat_mask_downbeats": False,
381
+ },
382
+ "beat-driven variation": {
383
+ "periodic_p": 0,
384
+ "onset_mask_width": 0,
385
+ "beat_mask_width": 50,
386
+ "beat_mask_downbeats": False,
387
+ },
388
+ "beat-driven variation (downbeats only)": {
389
+ "periodic_p": 0,
390
+ "onset_mask_width": 0,
391
+ "beat_mask_width": 50,
392
+ "beat_mask_downbeats": True,
393
+ },
394
+ "beat-driven variation (downbeats only, strong)": {
395
+ "periodic_p": 0,
396
+ "onset_mask_width": 0,
397
+ "beat_mask_width": 20,
398
+ "beat_mask_downbeats": True,
399
+ },
400
+ }
401
+
402
+ preset = gr.Dropdown(
403
+ label="preset",
404
+ choices=list(presets.keys()),
405
+ value="strong periodic variation",
406
+ )
407
+ load_preset_button = gr.Button("load_preset")
408
+
409
+ with gr.Accordion("manual controls", open=True):
410
+ periodic_p = gr.Slider(
411
+ label="periodic prompt (0 - unconditional, 2 - lots of hints, 8 - a couple of hints, 16 - occasional hint, 32 - very occasional hint, etc)",
412
+ minimum=0,
413
+ maximum=128,
414
+ step=1,
415
+ value=3,
416
+ )
417
+
418
+
419
+ onset_mask_width = gr.Slider(
420
+ label="onset mask width (multiplies with the periodic mask, 1 step ~= 10milliseconds) ",
421
+ minimum=0,
422
+ maximum=100,
423
+ step=1,
424
+ value=5,
425
+ )
426
+
427
+ beat_mask_width = gr.Slider(
428
+ label="beat prompt (ms)",
429
+ minimum=0,
430
+ maximum=200,
431
+ value=0,
432
+ )
433
+ beat_mask_downbeats = gr.Checkbox(
434
+ label="beat mask downbeats only?",
435
+ value=False
436
+ )
437
+
438
+ n_mask_codebooks = gr.Number(
439
+ label="first upper codebook level to mask",
440
+ value=9,
441
+ )
442
+
443
+
444
+ with gr.Accordion("extras ", open=False):
445
+ pitch_shift_amt = gr.Slider(
446
+ label="pitch shift amount (semitones)",
447
+ minimum=-12,
448
+ maximum=12,
449
+ step=1,
450
+ value=0,
451
+ )
452
+
453
+ rand_mask_intensity = gr.Slider(
454
+ label="random mask intensity. (If this is less than 1, scatters prompts throughout the audio, should be between 0.9 and 1.0)",
455
+ minimum=0.0,
456
+ maximum=1.0,
457
+ value=1.0
458
+ )
459
+
460
+ periodic_w = gr.Slider(
461
+ label="periodic prompt width (steps, 1 step ~= 10milliseconds)",
462
+ minimum=1,
463
+ maximum=20,
464
+ step=1,
465
+ value=1,
466
+ )
467
+ n_conditioning_codebooks = gr.Number(
468
+ label="number of conditioning codebooks. probably 0",
469
+ value=0,
470
+ precision=0,
471
+ )
472
+
473
+ stretch_factor = gr.Slider(
474
+ label="time stretch factor",
475
+ minimum=0,
476
+ maximum=64,
477
+ step=1,
478
+ value=1,
479
+ )
480
+
481
+ preset_outputs = {
482
+ periodic_p,
483
+ onset_mask_width,
484
+ beat_mask_width,
485
+ beat_mask_downbeats,
486
+ }
487
+
488
+ def load_preset(_preset):
489
+ return tuple(presets[_preset].values())
490
+
491
+ load_preset_button.click(
492
+ fn=load_preset,
493
+ inputs=[preset],
494
+ outputs=preset_outputs
495
+ )
496
+
497
+
498
+ with gr.Accordion("prefix/suffix prompts", open=False):
499
+ prefix_s = gr.Slider(
500
+ label="prefix hint length (seconds)",
501
+ minimum=0.0,
502
+ maximum=10.0,
503
+ value=0.0
504
+ )
505
+ suffix_s = gr.Slider(
506
+ label="suffix hint length (seconds)",
507
+ minimum=0.0,
508
+ maximum=10.0,
509
+ value=0.0
510
+ )
511
+
512
+ masktemp = gr.Slider(
513
+ label="mask temperature",
514
+ minimum=0.0,
515
+ maximum=100.0,
516
+ value=1.5
517
+ )
518
+ sampletemp = gr.Slider(
519
+ label="sample temperature",
520
+ minimum=0.1,
521
+ maximum=10.0,
522
+ value=1.0,
523
+ step=0.001
524
+ )
525
+
526
+
527
+
528
+ with gr.Accordion("sampling settings", open=False):
529
+ top_p = gr.Slider(
530
+ label="top p (0.0 = off)",
531
+ minimum=0.0,
532
+ maximum=1.0,
533
+ value=0.0
534
+ )
535
+ typical_filtering = gr.Checkbox(
536
+ label="typical filtering ",
537
+ value=False
538
+ )
539
+ typical_mass = gr.Slider(
540
+ label="typical mass (should probably stay between 0.1 and 0.5)",
541
+ minimum=0.01,
542
+ maximum=0.99,
543
+ value=0.15
544
+ )
545
+ typical_min_tokens = gr.Slider(
546
+ label="typical min tokens (should probably stay between 1 and 256)",
547
+ minimum=1,
548
+ maximum=256,
549
+ step=1,
550
+ value=64
551
+ )
552
+ sample_cutoff = gr.Slider(
553
+ label="sample cutoff",
554
+ minimum=0.0,
555
+ maximum=1.0,
556
+ value=0.5,
557
+ step=0.01
558
+ )
559
+
560
+ use_coarse2fine = gr.Checkbox(
561
+ label="use coarse2fine",
562
+ value=True,
563
+ visible=False
564
+ )
565
+
566
+ num_steps = gr.Slider(
567
+ label="number of steps (should normally be between 12 and 36)",
568
+ minimum=1,
569
+ maximum=128,
570
+ step=1,
571
+ value=36
572
+ )
573
+
574
+ dropout = gr.Slider(
575
+ label="mask dropout",
576
+ minimum=0.0,
577
+ maximum=1.0,
578
+ step=0.01,
579
+ value=0.0
580
+ )
581
+
582
+
583
+ seed = gr.Number(
584
+ label="seed (0 for random)",
585
+ value=0,
586
+ precision=0,
587
+ )
588
+
589
+
590
+
591
+ # mask settings
592
+ with gr.Column():
593
+
594
+ model_choice = gr.Dropdown(
595
+ label="model choice",
596
+ choices=list(MODEL_CHOICES.keys()),
597
+ value="default",
598
+ visible=True
599
+ )
600
+
601
+ vamp_button = gr.Button("generate (vamp)!!!")
602
+ output_audio = gr.Audio(
603
+ label="output audio",
604
+ interactive=False,
605
+ type="filepath"
606
+ )
607
+
608
+ notes_text = gr.Textbox(
609
+ label="type any notes about the generated audio here",
610
+ value="",
611
+ interactive=True
612
+ )
613
+ save_button = gr.Button("save vamp")
614
+ download_file = gr.File(
615
+ label="vamp to download will appear here",
616
+ interactive=False
617
+ )
618
+ use_as_input_button = gr.Button("use output as input")
619
+
620
+ thank_you = gr.Markdown("")
621
+
622
+
623
+ _inputs = {
624
+ input_audio,
625
+ num_steps,
626
+ masktemp,
627
+ sampletemp,
628
+ top_p,
629
+ prefix_s, suffix_s,
630
+ rand_mask_intensity,
631
+ periodic_p, periodic_w,
632
+ n_conditioning_codebooks,
633
+ dropout,
634
+ use_coarse2fine,
635
+ stretch_factor,
636
+ onset_mask_width,
637
+ typical_filtering,
638
+ typical_mass,
639
+ typical_min_tokens,
640
+ beat_mask_width,
641
+ beat_mask_downbeats,
642
+ seed,
643
+ model_choice,
644
+ n_mask_codebooks,
645
+ pitch_shift_amt,
646
+ sample_cutoff
647
+ }
648
+
649
+ # connect widgets
650
+ vamp_button.click(
651
+ fn=vamp,
652
+ inputs=_inputs,
653
+ outputs=[output_audio, audio_mask],
654
+ )
655
+
656
+ api_vamp_button = gr.Button("api vamp", visible=False)
657
+ api_vamp_button.click(
658
+ fn=api_vamp,
659
+ inputs=_inputs,
660
+ outputs=[output_audio],
661
+ api_name="vamp"
662
+ )
663
+
664
+ use_as_input_button.click(
665
+ fn=lambda x: x,
666
+ inputs=[output_audio],
667
+ outputs=[input_audio]
668
+ )
669
+
670
+ save_button.click(
671
+ fn=save_vamp,
672
+ inputs=_inputs | {notes_text, output_audio},
673
+ outputs=[thank_you, download_file]
674
+ )
675
+
676
+
677
+ demo.launch(share=True, debug=True)
678
+ demo.queue()
assets/example.wav ADDED
Binary file (883 kB). View file
 
conf/c2f.yml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/vampnet.yml
3
+
4
+ VampNet.n_codebooks: 14
5
+ VampNet.n_conditioning_codebooks: 4
6
+
7
+ VampNet.embedding_dim: 1280
8
+ VampNet.n_layers: 16
9
+ VampNet.n_heads: 20
10
+
11
+ AudioDataset.duration: 3.0
12
+
13
+
14
+ AudioDataset.loudness_cutoff: -40.0
conf/generated/bbc-humans/c2f.yml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/lora/lora.yml
3
+ AudioDataset.duration: 3.0
4
+ AudioDataset.loudness_cutoff: -40.0
5
+ VampNet.embedding_dim: 1280
6
+ VampNet.n_codebooks: 14
7
+ VampNet.n_conditioning_codebooks: 4
8
+ VampNet.n_heads: 20
9
+ VampNet.n_layers: 16
10
+ fine_tune: true
11
+ fine_tune_checkpoint: ./models/vampnet/c2f.pth
12
+ save_path: ./runs/bbc-humans/c2f
13
+ train/AudioLoader.sources: &id001
14
+ - /home/hugo/Humans/
15
+ val/AudioLoader.sources: *id001
conf/generated/bbc-humans/coarse.yml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/lora/lora.yml
3
+ fine_tune: true
4
+ fine_tune_checkpoint: ./models/vampnet/coarse.pth
5
+ save_path: ./runs/bbc-humans/coarse
6
+ train/AudioLoader.sources: &id001
7
+ - /home/hugo/Humans/
8
+ val/AudioLoader.sources: *id001
conf/generated/bbc-humans/interface.yml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ AudioLoader.sources:
2
+ - - /home/hugo/Humans/
3
+ Interface.coarse2fine_ckpt: ./runs/bbc-humans/c2f/latest/vampnet/weights.pth
4
+ Interface.coarse_ckpt: ./runs/bbc-humans/coarse/latest/vampnet/weights.pth
5
+ Interface.codec_ckpt: ./models/vampnet/codec.pth
6
+ Interface.wavebeat_ckpt: ./models/wavebeat.pth
conf/generated/boleros/c2f.yml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/lora/lora.yml
3
+ AudioDataset.duration: 3.0
4
+ AudioDataset.loudness_cutoff: -40.0
5
+ VampNet.embedding_dim: 1280
6
+ VampNet.n_codebooks: 14
7
+ VampNet.n_conditioning_codebooks: 4
8
+ VampNet.n_heads: 20
9
+ VampNet.n_layers: 16
10
+ fine_tune: true
11
+ fine_tune_checkpoint: ./models/vampnet/c2f.pth
12
+ save_path: ./runs/boleros/c2f
13
+ train/AudioLoader.sources: &id001
14
+ - /media/CHONK/hugo/loras/boleros
15
+ val/AudioLoader.sources: *id001
conf/generated/boleros/coarse.yml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/lora/lora.yml
3
+ fine_tune: true
4
+ fine_tune_checkpoint: ./models/vampnet/coarse.pth
5
+ save_path: ./runs/boleros/coarse
6
+ train/AudioLoader.sources: &id001
7
+ - /media/CHONK/hugo/loras/boleros
8
+ val/AudioLoader.sources: *id001
conf/generated/boleros/interface.yml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ AudioLoader.sources:
2
+ - - /media/CHONK/hugo/loras/boleros
3
+ Interface.coarse2fine_ckpt: ./runs/boleros/c2f/latest/vampnet/weights.pth
4
+ Interface.coarse_ckpt: ./runs/boleros/coarse/latest/vampnet/weights.pth
5
+ Interface.codec_ckpt: ./models/vampnet/codec.pth
6
+ Interface.wavebeat_ckpt: ./models/wavebeat.pth
conf/generated/bowl/c2f.yml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/lora/lora.yml
3
+ AudioDataset.duration: 3.0
4
+ AudioDataset.loudness_cutoff: -40.0
5
+ VampNet.embedding_dim: 1280
6
+ VampNet.n_codebooks: 14
7
+ VampNet.n_conditioning_codebooks: 4
8
+ VampNet.n_heads: 20
9
+ VampNet.n_layers: 16
10
+ fine_tune: true
11
+ fine_tune_checkpoint: ./models/vampnet/c2f.pth
12
+ save_path: ./runs/bowl/c2f
13
+ train/AudioLoader.sources: &id001
14
+ - /media/seagate_prosound/prosound_core_complete/Anns
15
+ - Animals
16
+ val/AudioLoader.sources: *id001
conf/generated/bowl/coarse.yml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/lora/lora.yml
3
+ fine_tune: true
4
+ fine_tune_checkpoint: ./models/vampnet/coarse.pth
5
+ save_path: ./runs/bowl/coarse
6
+ train/AudioLoader.sources: &id001
7
+ - /media/seagate_prosound/prosound_core_complete/Anns
8
+ - Animals
9
+ val/AudioLoader.sources: *id001
conf/generated/bowl/interface.yml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ AudioLoader.sources:
2
+ - - /media/seagate_prosound/prosound_core_complete/Anns
3
+ - Animals
4
+ Interface.coarse2fine_ckpt: ./runs/bowl/c2f/latest/vampnet/weights.pth
5
+ Interface.coarse_ckpt: ./runs/bowl/coarse/latest/vampnet/weights.pth
6
+ Interface.codec_ckpt: ./models/vampnet/codec.pth
7
+ Interface.wavebeat_ckpt: ./models/wavebeat.pth
conf/generated/breaks-steps/interface.yml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ AudioLoader.sources:
2
+ - - /media/CHONK/hugo/machines
3
+ Interface.coarse2fine_ckpt: ./runs-june-23/breaks-steps/c2f/best/vampnet/weights.pth
4
+ Interface.coarse2fine_lora_ckpt: ./runs-june-23/breaks-steps/c2f/best/lora.pth
5
+ Interface.coarse_ckpt: ./runs-june-23/breaks-steps/coarse/best/vampnet/weights.pth
6
+ Interface.coarse_lora_ckpt: ./runs-june-23/breaks-steps/coarse/best/lora.pth
7
+ Interface.codec_ckpt: ./models/vampnet/codec.pth
8
+ Interface.wavebeat_ckpt: ./models/wavebeat.pth
conf/generated/choir/interface.yml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ AudioLoader.sources:
2
+ - - /media/CHONK/hugo/knower
3
+ Interface.coarse2fine_ckpt: ./runs/choir/c2f/latest/vampnet/weights.pth
4
+ Interface.coarse2fine_lora_ckpt: ./runs/choir/c2f/latest/lora.pth
5
+ Interface.coarse_ckpt: ./runs/choir/coarse/latest/vampnet/weights.pth
6
+ Interface.coarse_lora_ckpt: ./runs/choir/coarse/latest/lora.pth
7
+ Interface.codec_ckpt: ./models/vampnet/codec.pth
8
+ Interface.wavebeat_ckpt: ./models/wavebeat.pth
9
+ Interface.coarse_chunk_size_s: 15
conf/generated/earlymachines/c2f.yml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/lora/lora.yml
3
+ AudioDataset.duration: 3.0
4
+ AudioDataset.loudness_cutoff: -40.0
5
+ VampNet.embedding_dim: 1280
6
+ VampNet.n_codebooks: 14
7
+ VampNet.n_conditioning_codebooks: 4
8
+ VampNet.n_heads: 20
9
+ VampNet.n_layers: 16
10
+ fine_tune: true
11
+ fine_tune_checkpoint: ./models/vampnet/c2f.pth
12
+ save_path: ./runs/machines/c2f
13
+ train/AudioLoader.sources: &id001
14
+ - /media/CHONK/hugo/machines
15
+ val/AudioLoader.sources: *id001
conf/generated/earlymachines/coarse.yml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/lora/lora.yml
3
+ fine_tune: true
4
+ fine_tune_checkpoint: ./models/vampnet/coarse.pth
5
+ save_path: ./runs/machines/coarse
6
+ train/AudioLoader.sources: &id001
7
+ - /media/CHONK/hugo/machines
8
+ val/AudioLoader.sources: *id001
conf/generated/earlymachines/interface.yml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ AudioLoader.sources:
2
+ - - /media/CHONK/hugo/machines
3
+ Interface.coarse2fine_ckpt: ./runs/machines/c2f/best/vampnet/weights.pth
4
+ Interface.coarse2fine_lora_ckpt: ./runs/machines/c2f/best/lora.pth
5
+ Interface.coarse_ckpt: ./runs/machines/coarse/best/vampnet/weights.pth
6
+ Interface.coarse_lora_ckpt: ./runs/machines/coarse/best/lora.pth
7
+ Interface.codec_ckpt: ./models/vampnet/codec.pth
8
+ Interface.wavebeat_ckpt: ./models/wavebeat.pth
conf/generated/funk/c2f.yml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/lora/lora.yml
3
+ AudioDataset.duration: 3.0
4
+ AudioDataset.loudness_cutoff: -40.0
5
+ VampNet.embedding_dim: 1280
6
+ VampNet.n_codebooks: 14
7
+ VampNet.n_conditioning_codebooks: 4
8
+ VampNet.n_heads: 20
9
+ VampNet.n_layers: 16
10
+ fine_tune: true
11
+ fine_tune_checkpoint: ./models/vampnet/c2f.pth
12
+ save_path: ./runs/knower/c2f
13
+ train/AudioLoader.sources: &id001
14
+ - /media/CHONK/hugo/knower
15
+ val/AudioLoader.sources: *id001
conf/generated/funk/coarse.yml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/lora/lora.yml
3
+ fine_tune: true
4
+ fine_tune_checkpoint: ./models/vampnet/coarse.pth
5
+ save_path: ./runs/knower/coarse
6
+ train/AudioLoader.sources: &id001
7
+ - /media/CHONK/hugo/knower
8
+ val/AudioLoader.sources: *id001
conf/generated/funk/interface.yml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ AudioLoader.sources:
2
+ - - /media/CHONK/hugo/knower
3
+ Interface.coarse2fine_ckpt: ./runs/knower/c2f/latest/vampnet/weights.pth
4
+ Interface.coarse2fine_lora_ckpt: ./runs/knower/c2f/latest/lora.pth
5
+ Interface.coarse_ckpt: ./runs/knower/coarse/latest/vampnet/weights.pth
6
+ Interface.coarse_lora_ckpt: ./runs/knower/coarse/latest/lora.pth
7
+ Interface.codec_ckpt: ./models/vampnet/codec.pth
8
+ Interface.wavebeat_ckpt: ./models/wavebeat.pth
conf/generated/ismir-birds/c2f.yml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/lora/lora.yml
3
+ AudioDataset.duration: 3.0
4
+ AudioDataset.loudness_cutoff: -40.0
5
+ VampNet.embedding_dim: 1280
6
+ VampNet.n_codebooks: 14
7
+ VampNet.n_conditioning_codebooks: 4
8
+ VampNet.n_heads: 20
9
+ VampNet.n_layers: 16
10
+ fine_tune: true
11
+ fine_tune_checkpoint: ./models/vampnet/c2f.pth
12
+ save_path: ./runs/ismir-birds/c2f
13
+ train/AudioLoader.sources: &id001
14
+ - /media/CHONK/hugo/ismir-23-cfm/xeno-canto-augmented-filtered
15
+ val/AudioLoader.sources: *id001
conf/generated/ismir-birds/coarse.yml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/lora/lora.yml
3
+ fine_tune: true
4
+ fine_tune_checkpoint: ./models/vampnet/coarse.pth
5
+ save_path: ./runs/ismir-birds/coarse
6
+ train/AudioLoader.sources: &id001
7
+ - /media/CHONK/hugo/ismir-23-cfm/xeno-canto-augmented-filtered
8
+ val/AudioLoader.sources: *id001
conf/generated/ismir-birds/interface.yml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ AudioLoader.sources:
2
+ - - /media/CHONK/hugo/ismir-23-cfm/xeno-canto-augmented-filtered
3
+ Interface.coarse2fine_ckpt: ./runs/ismir-birds/c2f/latest/vampnet/weights.pth
4
+ Interface.coarse2fine_lora_ckpt: ./runs/ismir-birds/c2f/latest/lora.pth
5
+ Interface.coarse_ckpt: ./runs/ismir-birds/coarse/latest/vampnet/weights.pth
6
+ Interface.coarse_lora_ckpt: ./runs/ismir-birds/coarse/latest/lora.pth
7
+ Interface.codec_ckpt: ./models/vampnet/codec.pth
8
+ Interface.wavebeat_ckpt: ./models/wavebeat.pth
conf/generated/ismir-machines/c2f.yml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/lora/lora.yml
3
+ AudioDataset.duration: 3.0
4
+ AudioDataset.loudness_cutoff: -40.0
5
+ VampNet.embedding_dim: 1280
6
+ VampNet.n_codebooks: 14
7
+ VampNet.n_conditioning_codebooks: 4
8
+ VampNet.n_heads: 20
9
+ VampNet.n_layers: 16
10
+ fine_tune: true
11
+ fine_tune_checkpoint: ./models/vampnet/c2f.pth
12
+ save_path: ./runs/ismir-machines/c2f
13
+ train/AudioLoader.sources: &id001
14
+ - /media/CHONK/hugo/ismir-23-cfm/machines-augmented-filtered
15
+ val/AudioLoader.sources: *id001
conf/generated/ismir-machines/coarse.yml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/lora/lora.yml
3
+ fine_tune: true
4
+ fine_tune_checkpoint: ./models/vampnet/coarse.pth
5
+ save_path: ./runs/ismir-machines/coarse
6
+ train/AudioLoader.sources: &id001
7
+ - /media/CHONK/hugo/ismir-23-cfm/machines-augmented-filtered
8
+ val/AudioLoader.sources: *id001
conf/generated/ismir-machines/interface.yml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ AudioLoader.sources:
2
+ - - /media/CHONK/hugo/ismir-23-cfm/machines-augmented-filtered
3
+ Interface.coarse2fine_ckpt: ./runs/ismir-machines/c2f/latest/vampnet/weights.pth
4
+ Interface.coarse2fine_lora_ckpt: ./runs/ismir-machines/c2f/latest/lora.pth
5
+ Interface.coarse_ckpt: ./runs/ismir-machines/coarse/latest/vampnet/weights.pth
6
+ Interface.coarse_lora_ckpt: ./runs/ismir-machines/coarse/latest/lora.pth
7
+ Interface.codec_ckpt: ./models/vampnet/codec.pth
8
+ Interface.wavebeat_ckpt: ./models/wavebeat.pth
conf/generated/machines/c2f.yml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/lora/lora.yml
3
+ AudioDataset.duration: 3.0
4
+ AudioDataset.loudness_cutoff: -40.0
5
+ VampNet.embedding_dim: 1280
6
+ VampNet.n_codebooks: 14
7
+ VampNet.n_conditioning_codebooks: 4
8
+ VampNet.n_heads: 20
9
+ VampNet.n_layers: 16
10
+ fine_tune: true
11
+ fine_tune_checkpoint: ./models/vampnet/c2f.pth
12
+ save_path: ./runs/machines/c2f
13
+ train/AudioLoader.sources: &id001
14
+ - /media/CHONK/hugo/machines
15
+ val/AudioLoader.sources: *id001
conf/generated/machines/coarse.yml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/lora/lora.yml
3
+ fine_tune: true
4
+ fine_tune_checkpoint: ./models/vampnet/coarse.pth
5
+ save_path: ./runs/machines/coarse
6
+ train/AudioLoader.sources: &id001
7
+ - /media/CHONK/hugo/machines
8
+ val/AudioLoader.sources: *id001
conf/generated/machines/interface.yml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ AudioLoader.sources:
2
+ - - /media/CHONK/hugo/machines
3
+ Interface.coarse2fine_ckpt: ./runs/machines/c2f/best/vampnet/weights.pth
4
+ Interface.coarse2fine_lora_ckpt: ./runs/machines/c2f/best/lora.pth
5
+ Interface.coarse_ckpt: ./runs/machines/coarse/best/vampnet/weights.pth
6
+ Interface.coarse_lora_ckpt: ./runs/machines/coarse/best/lora.pth
7
+ Interface.codec_ckpt: ./models/vampnet/codec.pth
8
+ Interface.wavebeat_ckpt: ./models/wavebeat.pth
conf/generated/musdb/c2f.yml ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/lora/lora.yml
3
+ AudioDataset.duration: 3.0
4
+ AudioDataset.loudness_cutoff: -40.0
5
+ VampNet.embedding_dim: 1280
6
+ VampNet.n_codebooks: 14
7
+ VampNet.n_conditioning_codebooks: 4
8
+ VampNet.n_heads: 20
9
+ VampNet.n_layers: 16
10
+ fine_tune: true
11
+ fine_tune_checkpoint: ./models/vampnet/c2f.pth
12
+ save_path: ./runs/xeno-canto/c2f
13
+
14
+ AudioDataset.aligned: true
15
+ train/build_dataset.folders:
16
+ bass:
17
+ sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/train]
18
+ ext: "bass.wav"
19
+ drums:
20
+ sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/train]
21
+ ext: "drums.wav"
22
+ other:
23
+ sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/train]
24
+ ext: "other.wav"
25
+ vocals:
26
+ sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/train]
27
+ ext: "vocals.wav"
28
+ val/build_dataset.folders:
29
+ bass:
30
+ sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/val]
31
+ ext: "bass.wav"
32
+ drums:
33
+ sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/val]
34
+ ext: "drums.wav"
35
+ other:
36
+ sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/val]
37
+ ext: "other.wav"
38
+ vocals:
39
+ sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/val]
40
+ ext: "vocals.wav"
conf/generated/musdb/coarse.yml ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/lora/lora.yml
3
+ fine_tune: true
4
+ fine_tune_checkpoint: ./models/vampnet/coarse.pth
5
+ save_path: ./runs/xeno-canto/coarse
6
+ train/build_dataset.folders:
7
+ bass:
8
+ sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/train/]
9
+ ext: ["bass.wav"]
10
+ drums:
11
+ sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/train/]
12
+ ext: ["drums.wav"]
13
+ other:
14
+ sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/train/]
15
+ ext: ["other.wav"]
16
+ vocals:
17
+ sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/train/]
18
+ ext: ["vocals.wav"]
19
+ val/build_dataset.folders:
20
+ bass:
21
+ sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/test/]
22
+ ext: ["bass.wav"]
23
+ drums:
24
+ sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/test/]
25
+ ext: ["drums.wav"]
26
+ other:
27
+ sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/test/]
28
+ ext: ["other.wav"]
29
+ vocals:
30
+ sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/test/]
31
+ ext: ["vocals.wav"]
conf/generated/musdb/interface.yml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ Interface.coarse2fine_ckpt: ./models/vampnet/c2f.pth
2
+ Interface.coarse2fine_lora_ckpt: null
3
+
4
+ Interface.coarse_ckpt: ./runs/musdb-cond-clfdrop/best/vampnet/weights.pth
5
+ Interface.coarse_lora_ckpt: null
6
+
7
+ Interface.codec_ckpt: ./models/vampnet/codec.pth
8
+ Interface.wavebeat_ckpt: ./models/wavebeat.pth
conf/generated/n64/c2f.yml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/lora/lora.yml
3
+ AudioDataset.duration: 3.0
4
+ AudioDataset.loudness_cutoff: -40.0
5
+ VampNet.embedding_dim: 1280
6
+ VampNet.n_codebooks: 14
7
+ VampNet.n_conditioning_codebooks: 4
8
+ VampNet.n_heads: 20
9
+ VampNet.n_layers: 16
10
+ fine_tune: true
11
+ fine_tune_checkpoint: ./models/vampnet/c2f.pth
12
+ save_path: ./runs/n64/c2f
13
+ train/AudioLoader.sources: &id001
14
+ - data/salad-bowl/n64-jungle/n64-jungle-mix.wav
15
+ val/AudioLoader.sources: *id001
conf/generated/n64/coarse.yml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/lora/lora.yml
3
+ fine_tune: true
4
+ fine_tune_checkpoint: ./models/vampnet/coarse.pth
5
+ save_path: ./runs/n64/coarse
6
+ train/AudioLoader.sources: &id001
7
+ - data/salad-bowl/n64-jungle/n64-jungle-mix.wav
8
+ val/AudioLoader.sources: *id001
conf/generated/n64/interface.yml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ AudioLoader.sources:
2
+ - - data/salad-bowl/n64-jungle/n64-jungle-mix.wav
3
+ Interface.coarse2fine_ckpt: ./runs/n64/c2f/latest/vampnet/weights.pth
4
+ Interface.coarse_ckpt: ./runs/n64/coarse/latest/vampnet/weights.pth
5
+ Interface.codec_ckpt: ./models/vampnet/codec.pth
6
+ Interface.wavebeat_ckpt: ./models/wavebeat.pth
conf/generated/natural-sounds/c2f.yml ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/lora/lora.yml
3
+ AudioDataset.duration: 3.0
4
+ AudioDataset.loudness_cutoff: -40.0
5
+ VampNet.embedding_dim: 1280
6
+ VampNet.n_codebooks: 14
7
+ VampNet.n_conditioning_codebooks: 4
8
+ VampNet.n_heads: 20
9
+ VampNet.n_layers: 16
10
+ fine_tune: true
11
+ fine_tune_checkpoint: ./models/vampnet/c2f.pth
12
+ save_path: ./runs/soundrangers-v2/c2f
13
+ train/AudioLoader.sources: &id001
14
+ - /media/CHONK2/prosound_redacted/Soundrangers Complete
15
+ - /media/CHONK2/prosound_redacted/Soundrangers Update 2018
16
+ - /media/CHONK2/prosound_redacted/BBC Nature Sound Effects Library/Animals
17
+ - /media/CHONK2/prosound_redacted/BBC Nature Sound Effects Library/Birds
18
+ - /media/CHONK2/prosound_redacted/BBC Historical and 1-166 Sound Effects Library/Foley
19
+ - /media/CHONK2/prosound_redacted/BBC Historical and 1-166 Sound Effects Library/Musical
20
+ - /media/CHONK2/prosound_redacted/Big Room Complete/Mammals - Dogs
21
+ - /media/CHONK2/prosound_redacted/Big Room Complete/Mammals - Farm
22
+ - /media/CHONK2/prosound_redacted/Big Room Complete/Mammals - Horses
23
+ - /media/CHONK2/prosound_redacted/Big Room Complete/Mammals - Rodents
24
+ - /media/CHONK2/prosound_redacted/Big Room Complete/Mammals - Wild
25
+ - /media/CHONK2/prosound_redacted/Big Room Complete/Bells
26
+ - /media/CHONK2/prosound_redacted/King Collection - Volume 1/Musical - Chimes
27
+ - /media/CHONK2/prosound_redacted/King Collection - Volume 1/Musical - Instruments
28
+ val/AudioLoader.sources: *id001
conf/generated/natural-sounds/coarse.yml ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/lora/lora.yml
3
+ fine_tune: true
4
+ fine_tune_checkpoint: ./models/vampnet/coarse.pth
5
+ save_path: ./runs/soundrangers-v2/coarse
6
+ train/AudioLoader.sources: &id001
7
+ - /media/CHONK2/prosound_redacted/Soundrangers Complete
8
+ - /media/CHONK2/prosound_redacted/Soundrangers Update 2018
9
+ - /media/CHONK2/prosound_redacted/BBC Nature Sound Effects Library/Animals
10
+ - /media/CHONK2/prosound_redacted/BBC Nature Sound Effects Library/Birds
11
+ - /media/CHONK2/prosound_redacted/BBC Historical and 1-166 Sound Effects Library/Foley
12
+ - /media/CHONK2/prosound_redacted/BBC Historical and 1-166 Sound Effects Library/Musical
13
+ - /media/CHONK2/prosound_redacted/Big Room Complete/Mammals - Dogs
14
+ - /media/CHONK2/prosound_redacted/Big Room Complete/Mammals - Farm
15
+ - /media/CHONK2/prosound_redacted/Big Room Complete/Mammals - Horses
16
+ - /media/CHONK2/prosound_redacted/Big Room Complete/Mammals - Rodents
17
+ - /media/CHONK2/prosound_redacted/Big Room Complete/Mammals - Wild
18
+ - /media/CHONK2/prosound_redacted/Big Room Complete/Bells
19
+ - /media/CHONK2/prosound_redacted/King Collection - Volume 1/Musical - Chimes
20
+ - /media/CHONK2/prosound_redacted/King Collection - Volume 1/Musical - Instruments
21
+ val/AudioLoader.sources: *id001
conf/generated/natural-sounds/interface.yml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ AudioLoader.sources:
2
+ - - /media/CHONK2/prosound_redacted/Soundrangers
3
+ - Complete
4
+ Interface.coarse2fine_ckpt: ./runs/soundrangers-v2/c2f/latest/vampnet/weights.pth
5
+ Interface.coarse_ckpt: ./runs/soundrangers-v2/coarse/latest/vampnet/weights.pth
6
+ Interface.codec_ckpt: ./models/vampnet/codec.pth
7
+ Interface.wavebeat_ckpt: ./models/wavebeat.pth
conf/generated/nes/c2f.yml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/lora/lora.yml
3
+ AudioDataset.duration: 3.0
4
+ AudioDataset.loudness_cutoff: -40.0
5
+ VampNet.embedding_dim: 1280
6
+ VampNet.n_codebooks: 14
7
+ VampNet.n_conditioning_codebooks: 4
8
+ VampNet.n_heads: 20
9
+ VampNet.n_layers: 16
10
+ fine_tune: true
11
+ fine_tune_checkpoint: ./models/vampnet/c2f.pth
12
+ save_path: ./runs/nes/c2f
13
+ train/AudioLoader.sources: &id001
14
+ - data/salad-bowl/chiptune/nes.wav
15
+ val/AudioLoader.sources: *id001
conf/generated/nes/coarse.yml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/lora/lora.yml
3
+ fine_tune: true
4
+ fine_tune_checkpoint: ./models/vampnet/coarse.pth
5
+ save_path: ./runs/nes/coarse
6
+ train/AudioLoader.sources: &id001
7
+ - data/salad-bowl/chiptune/nes.wav
8
+ val/AudioLoader.sources: *id001
conf/generated/nes/interface.yml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ AudioLoader.sources:
2
+ - - data/salad-bowl/chiptune/nes.wav
3
+ Interface.coarse2fine_ckpt: ./runs/nes/c2f/latest/vampnet/weights.pth
4
+ Interface.coarse_ckpt: ./runs/nes/coarse/latest/vampnet/weights.pth
5
+ Interface.codec_ckpt: ./models/vampnet/codec.pth
6
+ Interface.wavebeat_ckpt: ./models/wavebeat.pth
conf/generated/nyc-subway/c2f.yml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/lora/lora.yml
3
+ AudioDataset.duration: 3.0
4
+ AudioDataset.loudness_cutoff: -40.0
5
+ VampNet.embedding_dim: 1280
6
+ VampNet.n_codebooks: 14
7
+ VampNet.n_conditioning_codebooks: 4
8
+ VampNet.n_heads: 20
9
+ VampNet.n_layers: 16
10
+ fine_tune: true
11
+ fine_tune_checkpoint: ./models/vampnet/c2f.pth
12
+ save_path: ./runs/nyc-subway/c2f
13
+ train/AudioLoader.sources: &id001
14
+ - /media/CHONK/hugo/nyc-subway
15
+ val/AudioLoader.sources: *id001
conf/generated/nyc-subway/coarse.yml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/lora/lora.yml
3
+ fine_tune: true
4
+ fine_tune_checkpoint: ./models/vampnet/coarse.pth
5
+ save_path: ./runs/nyc-subway/coarse
6
+ train/AudioLoader.sources: &id001
7
+ - /media/CHONK/hugo/nyc-subway
8
+ val/AudioLoader.sources: *id001
conf/generated/nyc-subway/interface.yml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ AudioLoader.sources:
2
+ - /media/CHONK/hugo/nyc-subway
3
+ Interface.coarse2fine_ckpt: ./runs/nyc-subway/c2f/latest/vampnet/weights.pth
4
+ Interface.coarse2fine_lora_ckpt: ./runs/nyc-subway/c2f/latest/lora.pth
5
+ Interface.coarse_ckpt: ./runs/nyc-subway/coarse/latest/vampnet/weights.pth
6
+ Interface.coarse_lora_ckpt: ./runs/nyc-subway/coarse/latest/lora.pth
7
+ Interface.codec_ckpt: ./models/vampnet/codec.pth
8
+ Interface.wavebeat_ckpt: ./models/wavebeat.pth
conf/generated/ocean-waves/c2f.yml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/lora/lora.yml
3
+ AudioDataset.duration: 3.0
4
+ AudioDataset.loudness_cutoff: -40.0
5
+ VampNet.embedding_dim: 1280
6
+ VampNet.n_codebooks: 14
7
+ VampNet.n_conditioning_codebooks: 4
8
+ VampNet.n_heads: 20
9
+ VampNet.n_layers: 16
10
+ fine_tune: true
11
+ fine_tune_checkpoint: ./models/vampnet/c2f.pth
12
+ save_path: ./runs/ocean-waves/c2f
13
+ train/AudioLoader.sources: &id001
14
+ - /media/CHONK/hugo/ocean-waves-sounds
15
+ val/AudioLoader.sources: *id001
conf/generated/ocean-waves/coarse.yml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ $include:
2
+ - conf/lora/lora.yml
3
+ fine_tune: true
4
+ fine_tune_checkpoint: ./models/vampnet/coarse.pth
5
+ save_path: ./runs/ocean-waves/coarse
6
+ train/AudioLoader.sources: &id001
7
+ - /media/CHONK/hugo/ocean-waves-sounds
8
+ val/AudioLoader.sources: *id001
conf/generated/ocean-waves/interface.yml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ AudioLoader.sources:
2
+ - /media/CHONK/hugo/ocean-waves-sounds.mp3
3
+ Interface.coarse2fine_ckpt: ./runs/ocean-waves/c2f/latest/vampnet/weights.pth
4
+ Interface.coarse2fine_lora_ckpt: ./runs/ocean-waves/c2f/latest/lora.pth
5
+ Interface.coarse_ckpt: ./runs/ocean-waves/coarse/latest/vampnet/weights.pth
6
+ Interface.coarse_lora_ckpt: ./runs/ocean-waves/coarse/latest/lora.pth
7
+ Interface.codec_ckpt: ./models/vampnet/codec.pth
8
+ Interface.wavebeat_ckpt: ./models/wavebeat.pth