Spaces:

hugggof
/

salad_bowl

Sleeping

App Files Files Community

Hugo Flores Garcia commited on Jan 4

Commit

bccd2ea

•

1 Parent(s): 3419098

add stuff

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitignore +190 -0
LICENSE +21 -0
README.md +113 -6
app.py +678 -0
assets/example.wav +0 -0
conf/c2f.yml +14 -0
conf/generated/bbc-humans/c2f.yml +15 -0
conf/generated/bbc-humans/coarse.yml +8 -0
conf/generated/bbc-humans/interface.yml +6 -0
conf/generated/boleros/c2f.yml +15 -0
conf/generated/boleros/coarse.yml +8 -0
conf/generated/boleros/interface.yml +6 -0
conf/generated/bowl/c2f.yml +16 -0
conf/generated/bowl/coarse.yml +9 -0
conf/generated/bowl/interface.yml +7 -0
conf/generated/breaks-steps/interface.yml +8 -0
conf/generated/choir/interface.yml +9 -0
conf/generated/earlymachines/c2f.yml +15 -0
conf/generated/earlymachines/coarse.yml +8 -0
conf/generated/earlymachines/interface.yml +8 -0
conf/generated/funk/c2f.yml +15 -0
conf/generated/funk/coarse.yml +8 -0
conf/generated/funk/interface.yml +8 -0
conf/generated/ismir-birds/c2f.yml +15 -0
conf/generated/ismir-birds/coarse.yml +8 -0
conf/generated/ismir-birds/interface.yml +8 -0
conf/generated/ismir-machines/c2f.yml +15 -0
conf/generated/ismir-machines/coarse.yml +8 -0
conf/generated/ismir-machines/interface.yml +8 -0
conf/generated/machines/c2f.yml +15 -0
conf/generated/machines/coarse.yml +8 -0
conf/generated/machines/interface.yml +8 -0
conf/generated/musdb/c2f.yml +40 -0
conf/generated/musdb/coarse.yml +31 -0
conf/generated/musdb/interface.yml +8 -0
conf/generated/n64/c2f.yml +15 -0
conf/generated/n64/coarse.yml +8 -0
conf/generated/n64/interface.yml +6 -0
conf/generated/natural-sounds/c2f.yml +28 -0
conf/generated/natural-sounds/coarse.yml +21 -0
conf/generated/natural-sounds/interface.yml +7 -0
conf/generated/nes/c2f.yml +15 -0
conf/generated/nes/coarse.yml +8 -0
conf/generated/nes/interface.yml +6 -0
conf/generated/nyc-subway/c2f.yml +15 -0
conf/generated/nyc-subway/coarse.yml +8 -0
conf/generated/nyc-subway/interface.yml +8 -0
conf/generated/ocean-waves/c2f.yml +15 -0
conf/generated/ocean-waves/coarse.yml +8 -0
conf/generated/ocean-waves/interface.yml +8 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,190 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+.python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/env.sh
+venv/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# Files created by experiments
+output/
+snapshot/
+*.m4a
+notebooks/scratch.ipynb
+notebooks/inspect.ipynb
+notebooks/effects.ipynb
+notebooks/*.ipynb
+notebooks/*.gif
+notebooks/*.wav
+notebooks/*.mp4
+*runs/
+boards/
+samples/
+*.ipynb
+results.json
+metrics.csv
+mprofile_*
+mem.png
+results/
+mprofile*
+*.png
+# do not ignore the test wav file
+!tests/audio/short_test_audio.wav
+!tests/audio/output.wav
+*/.DS_Store
+.DS_Store
+env.sh
+_codebraid/
+**/*.html
+**/*.exec.md
+flagged/
+log.txt
+ckpt/
+.syncthing*
+tests/assets/
+archived/
+scratch/
+runs-archive
+lyrebird-audiotools
+lyrebird-audio-codec
+samples-*/**
+gradio-outputs/
+samples*/
+models-all/
+models.zip
+.git-old
+gtzan.zip
+.gtzan_emb_cache
+data/
+data

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2023 Hugo Flores García and Prem Seetharaman
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md CHANGED Viewed

@@ -1,13 +1,120 @@
 ---
 title: Salad Bowl
-emoji: ⚡
-colorFrom: pink
-colorTo: gray
 sdk: gradio
-sdk_version: 4.12.0
 app_file: app.py
 pinned: false
-license: cc-by-nc-sa-4.0
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: Salad Bowl
+emoji: 🥗
+colorFrom: yellow
+colorTo: green
 sdk: gradio
+sdk_version: 4.11.0
 app_file: app.py
 pinned: false
+license: cc-by-nc-4.0
 ---
+# VampNet
+This repository contains recipes for training generative music models on top of the Descript Audio Codec.
+## try `unloop`
+you can try vampnet in a co-creative looper called unloop. see this link: https://github.com/hugofloresgarcia/unloop
+# Setting up
+**Requires Python 3.9**.
+you'll need a Python 3.9 environment to run VampNet. This is due to a [known issue with madmom](https://github.com/hugofloresgarcia/vampnet/issues/15).
+(for example, using conda)
+```bash
+conda create -n vampnet python=3.9
+conda activate vampnet
+```
+install VampNet
+```bash
+git clone https://github.com/hugofloresgarcia/vampnet.git
+pip install -e ./vampnet
+```
+## A note on argbind
+This repository relies on [argbind](https://github.com/pseeth/argbind) to manage CLIs and config files.
+Config files are stored in the `conf/` folder.
+## Getting the Pretrained Models
+### Licensing for Pretrained Models:
+The weights for the models are licensed [`CC BY-NC-SA 4.0`](https://creativecommons.org/licenses/by-nc-sa/4.0/deed.ml). Likewise, any VampNet models fine-tuned on the pretrained models are also licensed [`CC BY-NC-SA 4.0`](https://creativecommons.org/licenses/by-nc-sa/4.0/deed.ml).
+Download the pretrained models from [this link](https://zenodo.org/record/8136629). Then, extract the models to the `models/` folder.
+# Usage
+## Launching the Gradio Interface
+You can launch a gradio UI to play with vampnet.
+```bash
+python app.py --args.load conf/interface.yml --Interface.device cuda
+```
+# Training / Fine-tuning
+## Training a model
+To train a model, run the following script:
+```bash
+python scripts/exp/train.py --args.load conf/vampnet.yml --save_path /path/to/checkpoints
+```
+for multi-gpu training, use torchrun:
+```bash
+torchrun --nproc_per_node gpu scripts/exp/train.py --args.load conf/vampnet.yml --save_path path/to/ckpt
+```
+You can edit `conf/vampnet.yml` to change the dataset paths or any training hyperparameters.
+For coarse2fine models, you can use `conf/c2f.yml` as a starting configuration.
+See `python scripts/exp/train.py -h` for a list of options.
+## Debugging training
+To debug training, it's easier to debug with 1 gpu and 0 workers
+```bash
+CUDA_VISIBLE_DEVICES=0 python -m pdb scripts/exp/train.py --args.load conf/vampnet.yml --save_path /path/to/checkpoints --num_workers 0
+```
+## Fine-tuning
+To fine-tune a model, use the script in `scripts/exp/fine_tune.py` to generate 3 configuration files: `c2f.yml`, `coarse.yml`, and `interface.yml`.
+The first two are used to fine-tune the coarse and fine models, respectively. The last one is used to launch the gradio interface.
+```bash
+python scripts/exp/fine_tune.py "/path/to/audio1.mp3 /path/to/audio2/ /path/to/audio3.wav" <fine_tune_name>
+```
+This will create a folder under `conf/<fine_tune_name>/` with the 3 configuration files.
+The save_paths will be set to `runs/<fine_tune_name>/coarse` and `runs/<fine_tune_name>/c2f`.
+launch the coarse job:
+```bash
+python scripts/exp/train.py --args.load conf/generated/<fine_tune_name>/coarse.yml
+```
+this will save the coarse model to `runs/<fine_tune_name>/coarse/ckpt/best/`.
+launch the c2f job:
+```bash
+python  scripts/exp/train.py --args.load conf/generated/<fine_tune_name>/c2f.yml
+```
+launch the interface:
+```bash
+python  app.py --args.load conf/generated/<fine_tune_name>/interface.yml
+```

app.py ADDED Viewed

	@@ -0,0 +1,678 @@

+from pathlib import Path
+import yaml
+import uuid
+import numpy as np
+import audiotools as at
+import argbind
+import shutil
+import torch
+import gradio as gr
+from vampnet.interface import Interface
+from vampnet import mask as pmask
+device = "cuda" if torch.cuda.is_available() else "cpu"
+interface = Interface(
+    device=device,
+    coarse_ckpt="models/vampnet/coarse.pth",
+    coarse2fine_ckpt="models/vampnet/c2f.pth",
+    codec_ckpt="models/vampnet/codec.pth",
+)
+# populate the model choices with any interface.yml files in the generated confs
+MODEL_CHOICES = {
+    "default": {
+        "Interface.coarse_ckpt": str(interface.coarse_path),
+        "Interface.coarse2fine_ckpt": str(interface.c2f_path),
+        "Interface.codec_ckpt": str(interface.codec_path),
+    }
+}
+generated_confs = Path("conf/generated")
+for conf_file in generated_confs.glob("*/interface.yml"):
+    with open(conf_file) as f:
+        _conf = yaml.safe_load(f)
+        MODEL_CHOICES[conf_file.parent.name] = _conf
+OUT_DIR = Path("gradio-outputs")
+OUT_DIR.mkdir(exist_ok=True, parents=True)
+def load_audio(file):
+    print(file)
+    filepath = file.name
+    sig = at.AudioSignal.salient_excerpt(
+        filepath,
+        duration=interface.coarse.chunk_size_s
+    )
+    sig = interface.preprocess(sig)
+    out_dir = OUT_DIR / "tmp" / str(uuid.uuid4())
+    out_dir.mkdir(parents=True, exist_ok=True)
+    sig.write(out_dir / "input.wav")
+    return sig.path_to_file
+def load_example_audio():
+    return "./assets/example.wav"
+from torch_pitch_shift import pitch_shift, get_fast_shifts
+def shift_pitch(signal, interval: int):
+    signal.samples = pitch_shift(
+        signal.samples,
+        shift=interval,
+        sample_rate=signal.sample_rate
+    )
+    return signal
+def _vamp(data, return_mask=False):
+    # remove any old files in the output directory (from previous runs)
+    shutil.rmtree(OUT_DIR)
+    OUT_DIR.mkdir()
+    out_dir = OUT_DIR / str(uuid.uuid4())
+    out_dir.mkdir()
+    sig = at.AudioSignal(data[input_audio])
+    sig = interface.preprocess(sig)
+    # reload the model if necessary
+    interface.reload(
+        coarse_ckpt=MODEL_CHOICES[data[model_choice]]["Interface.coarse_ckpt"],
+        c2f_ckpt=MODEL_CHOICES[data[model_choice]]["Interface.coarse2fine_ckpt"],
+    )
+    loudness = sig.loudness()
+    print(f"input loudness is {loudness}")
+    if data[pitch_shift_amt] != 0:
+        sig = shift_pitch(sig, data[pitch_shift_amt])
+    z = interface.encode(sig)
+    ncc = data[n_conditioning_codebooks]
+    # build the mask
+    mask = pmask.linear_random(z, data[rand_mask_intensity])
+    mask = pmask.mask_and(
+        mask, pmask.inpaint(
+            z,
+            interface.s2t(data[prefix_s]),
+            interface.s2t(data[suffix_s])
+        )
+    )
+    mask = pmask.mask_and(
+        mask, pmask.periodic_mask(
+            z,
+            data[periodic_p],
+            data[periodic_w],
+            random_roll=True
+        )
+    )
+    if data[onset_mask_width] > 0:
+        mask = pmask.mask_or(
+            mask, pmask.onset_mask(sig, z, interface, width=data[onset_mask_width])
+        )
+    if data[beat_mask_width] > 0:
+        beat_mask = interface.make_beat_mask(
+            sig,
+            after_beat_s=(data[beat_mask_width]/1000),
+            mask_upbeats=not data[beat_mask_downbeats],
+        )
+        mask = pmask.mask_and(mask, beat_mask)
+    # these should be the last two mask ops
+    mask = pmask.dropout(mask, data[dropout])
+    mask = pmask.codebook_unmask(mask, ncc)
+    mask = pmask.codebook_mask(mask, int(data[n_mask_codebooks]))
+    print(f"dropout {data[dropout]}")
+    print(f"masktemp {data[masktemp]}")
+    print(f"sampletemp {data[sampletemp]}")
+    print(f"top_p {data[top_p]}")
+    print(f"prefix_s {data[prefix_s]}")
+    print(f"suffix_s {data[suffix_s]}")
+    print(f"rand_mask_intensity {data[rand_mask_intensity]}")
+    print(f"num_steps {data[num_steps]}")
+    print(f"periodic_p {data[periodic_p]}")
+    print(f"periodic_w {data[periodic_w]}")
+    print(f"n_conditioning_codebooks {data[n_conditioning_codebooks]}")
+    print(f"use_coarse2fine {data[use_coarse2fine]}")
+    print(f"onset_mask_width {data[onset_mask_width]}")
+    print(f"beat_mask_width {data[beat_mask_width]}")
+    print(f"beat_mask_downbeats {data[beat_mask_downbeats]}")
+    print(f"stretch_factor {data[stretch_factor]}")
+    print(f"seed {data[seed]}")
+    print(f"pitch_shift_amt {data[pitch_shift_amt]}")
+    print(f"sample_cutoff {data[sample_cutoff]}")
+    _top_p = data[top_p] if data[top_p] > 0 else None
+    # save the mask as a txt file
+    np.savetxt(out_dir / "mask.txt", mask[:,0,:].long().cpu().numpy())
+    _seed = data[seed] if data[seed] > 0 else None
+    print(f"processing coarse...")
+    zv, mask_z = interface.coarse_vamp(
+        z,
+        mask=mask,
+        sampling_steps=data[num_steps],
+        mask_temperature=data[masktemp]*10,
+        sampling_temperature=data[sampletemp],
+        return_mask=True,
+        typical_filtering=data[typical_filtering],
+        typical_mass=data[typical_mass],
+        typical_min_tokens=data[typical_min_tokens],
+        top_p=_top_p,
+        gen_fn=interface.coarse.generate,
+        seed=_seed,
+        sample_cutoff=data[sample_cutoff],
+    )
+    if use_coarse2fine:
+        print(f"processing coarse to fine...")
+        zv = interface.coarse_to_fine(
+            zv,
+            mask_temperature=data[masktemp]*10,
+            sampling_temperature=data[sampletemp],
+            mask=mask,
+            sampling_steps=data[num_steps] // 2,
+            sample_cutoff=data[sample_cutoff],
+            seed=_seed,
+        )
+    sig = interface.to_signal(zv).cpu()
+    print("done")
+    print(f"output loudness is {sig.loudness()}")
+    sig = sig.normalize(loudness)
+    print(f"normalized loudness is {sig.loudness()}")
+    print("\n")
+    sig.write(out_dir / "output.wav")
+    if return_mask:
+        mask = interface.to_signal(mask_z).cpu()
+        mask.write(out_dir / "mask.wav")
+        return sig.path_to_file, mask.path_to_file
+    else:
+        return sig.path_to_file
+def vamp(data):
+    return _vamp(data, return_mask=True)
+def api_vamp(data):
+    return _vamp(data, return_mask=False)
+def save_vamp(data):
+    out_dir = OUT_DIR / "saved" / str(uuid.uuid4())
+    out_dir.mkdir(parents=True, exist_ok=True)
+    sig_in = at.AudioSignal(data[input_audio])
+    sig_out = at.AudioSignal(data[output_audio])
+    sig_in.write(out_dir / "input.wav")
+    sig_out.write(out_dir / "output.wav")
+    _data = {
+        "masktemp": data[masktemp],
+        "sampletemp": data[sampletemp],
+        "top_p": data[top_p],
+        "prefix_s": data[prefix_s],
+        "suffix_s": data[suffix_s],
+        "rand_mask_intensity": data[rand_mask_intensity],
+        "num_steps": data[num_steps],
+        "notes": data[notes_text],
+        "periodic_period": data[periodic_p],
+        "periodic_width": data[periodic_w],
+        "n_conditioning_codebooks": data[n_conditioning_codebooks],
+        "use_coarse2fine": data[use_coarse2fine],
+        "stretch_factor": data[stretch_factor],
+        "seed": data[seed],
+        "samplecutoff": data[sample_cutoff],
+    }
+    # save with yaml
+    with open(out_dir / "data.yaml", "w") as f:
+        yaml.dump(_data, f)
+    import zipfile
+    zip_path = str(out_dir.with_suffix(".zip"))
+    with zipfile.ZipFile(zip_path, "w") as zf:
+        for file in out_dir.iterdir():
+            zf.write(file, file.name)
+    return f"saved! your save code is {out_dir.stem}", zip_path
+def harp_vamp(_input_audio, _beat_mask_width, _sampletemp):
+    out_dir = OUT_DIR / str(uuid.uuid4())
+    out_dir.mkdir()
+    sig = at.AudioSignal(_input_audio)
+    sig = interface.preprocess(sig)
+    z = interface.encode(sig)
+    # build the mask
+    mask = pmask.linear_random(z, 1.0)
+    if _beat_mask_width > 0:
+        beat_mask = interface.make_beat_mask(
+            sig,
+            after_beat_s=(_beat_mask_width/1000),
+        )
+        mask = pmask.mask_and(mask, beat_mask)
+    # save the mask as a txt file
+    zv, mask_z = interface.coarse_vamp(
+        z,
+        mask=mask,
+        sampling_temperature=_sampletemp,
+        return_mask=True,
+        gen_fn=interface.coarse.generate,
+    )
+    zv = interface.coarse_to_fine(
+        zv,
+        sampling_temperature=_sampletemp,
+        mask=mask,
+    )
+    sig = interface.to_signal(zv).cpu()
+    print("done")
+    sig.write(out_dir / "output.wav")
+    return sig.path_to_file
+with gr.Blocks() as demo:
+    with gr.Row():
+        with gr.Column():
+            gr.Markdown("# VampNet Audio Vamping")
+            gr.Markdown("""## Description:
+            This is a demo of the VampNet, a generative audio model that transforms the input audio based on the chosen settings.
+            You can control the extent and nature of variation with a set of manual controls and presets.
+            Use this interface to experiment with different mask settings and explore the audio outputs.
+            """)
+            gr.Markdown("""
+            ## Instructions:
+            1. You can start by uploading some audio, or by loading the example audio.
+            2. Choose a preset for the vamp operation, or manually adjust the controls to customize the mask settings.
+            3. Click the "generate (vamp)!!!" button to apply the vamp operation. Listen to the output audio.
+            4. Optionally, you can add some notes and save the result.
+            5. You can also use the output as the new input and continue experimenting!
+            """)
+    with gr.Row():
+        with gr.Column():
+            manual_audio_upload = gr.File(
+                label=f"upload some audio (will be randomly trimmed to max of {interface.coarse.chunk_size_s:.2f}s)",
+                file_types=["audio"]
+            )
+            load_example_audio_button = gr.Button("or load example audio")
+            input_audio = gr.Audio(
+                label="input audio",
+                interactive=False,
+                type="filepath",
+            )
+            audio_mask = gr.Audio(
+                label="audio mask (listen to this to hear the mask hints)",
+                interactive=False,
+                type="filepath",
+            )
+            # connect widgets
+            load_example_audio_button.click(
+                fn=load_example_audio,
+                inputs=[],
+                outputs=[ input_audio]
+            )
+            manual_audio_upload.change(
+                fn=load_audio,
+                inputs=[manual_audio_upload],
+                outputs=[ input_audio]
+            )
+        # mask settings
+        with gr.Column():
+            presets = {
+                    "unconditional": {
+                        "periodic_p": 0,
+                        "onset_mask_width": 0,
+                        "beat_mask_width": 0,
+                        "beat_mask_downbeats": False,
+                    },
+                    "slight periodic variation": {
+                        "periodic_p": 5,
+                        "onset_mask_width": 5,
+                        "beat_mask_width": 0,
+                        "beat_mask_downbeats": False,
+                    },
+                    "moderate periodic variation": {
+                        "periodic_p": 13,
+                        "onset_mask_width": 5,
+                        "beat_mask_width": 0,
+                        "beat_mask_downbeats": False,
+                    },
+                    "strong periodic variation": {
+                        "periodic_p": 17,
+                        "onset_mask_width": 5,
+                        "beat_mask_width": 0,
+                        "beat_mask_downbeats": False,
+                    },
+                    "very strong periodic variation": {
+                        "periodic_p": 21,
+                        "onset_mask_width": 5,
+                        "beat_mask_width": 0,
+                        "beat_mask_downbeats": False,
+                    },
+                    "beat-driven variation": {
+                        "periodic_p": 0,
+                        "onset_mask_width": 0,
+                        "beat_mask_width": 50,
+                        "beat_mask_downbeats": False,
+                    },
+                    "beat-driven variation (downbeats only)": {
+                        "periodic_p": 0,
+                        "onset_mask_width": 0,
+                        "beat_mask_width": 50,
+                        "beat_mask_downbeats": True,
+                    },
+                    "beat-driven variation (downbeats only, strong)": {
+                        "periodic_p": 0,
+                        "onset_mask_width": 0,
+                        "beat_mask_width": 20,
+                        "beat_mask_downbeats": True,
+                    },
+                }
+            preset = gr.Dropdown(
+                label="preset",
+                choices=list(presets.keys()),
+                value="strong periodic variation",
+            )
+            load_preset_button = gr.Button("load_preset")
+            with gr.Accordion("manual controls", open=True):
+                periodic_p = gr.Slider(
+                    label="periodic prompt  (0 - unconditional, 2 - lots of hints, 8 - a couple of hints, 16 - occasional hint, 32 - very occasional hint, etc)",
+                    minimum=0,
+                    maximum=128,
+                    step=1,
+                    value=3,
+                )
+                onset_mask_width = gr.Slider(
+                    label="onset mask width (multiplies with the periodic mask, 1 step ~= 10milliseconds) ",
+                    minimum=0,
+                    maximum=100,
+                    step=1,
+                    value=5,
+                )
+                beat_mask_width = gr.Slider(
+                    label="beat prompt (ms)",
+                    minimum=0,
+                    maximum=200,
+                    value=0,
+                )
+                beat_mask_downbeats = gr.Checkbox(
+                    label="beat mask downbeats only?",
+                    value=False
+                )
+                n_mask_codebooks = gr.Number(
+                    label="first upper codebook level to mask",
+                    value=9,
+                )
+                with gr.Accordion("extras ", open=False):
+                    pitch_shift_amt = gr.Slider(
+                        label="pitch shift amount (semitones)",
+                        minimum=-12,
+                        maximum=12,
+                        step=1,
+                        value=0,
+                    )
+                    rand_mask_intensity = gr.Slider(
+                        label="random mask intensity. (If this is less than 1, scatters prompts throughout the audio, should be between 0.9 and 1.0)",
+                        minimum=0.0,
+                        maximum=1.0,
+                        value=1.0
+                    )
+                    periodic_w = gr.Slider(
+                        label="periodic prompt width (steps, 1 step ~= 10milliseconds)",
+                        minimum=1,
+                        maximum=20,
+                        step=1,
+                        value=1,
+                    )
+                    n_conditioning_codebooks = gr.Number(
+                        label="number of conditioning codebooks. probably 0",
+                        value=0,
+                        precision=0,
+                    )
+                    stretch_factor = gr.Slider(
+                        label="time stretch factor",
+                        minimum=0,
+                        maximum=64,
+                        step=1,
+                        value=1,
+                    )
+            preset_outputs = {
+                periodic_p,
+                onset_mask_width,
+                beat_mask_width,
+                beat_mask_downbeats,
+            }
+            def load_preset(_preset):
+                return tuple(presets[_preset].values())
+            load_preset_button.click(
+                fn=load_preset,
+                inputs=[preset],
+                outputs=preset_outputs
+            )
+            with gr.Accordion("prefix/suffix prompts", open=False):
+                prefix_s = gr.Slider(
+                    label="prefix hint length (seconds)",
+                    minimum=0.0,
+                    maximum=10.0,
+                    value=0.0
+                )
+                suffix_s = gr.Slider(
+                    label="suffix hint length (seconds)",
+                    minimum=0.0,
+                    maximum=10.0,
+                    value=0.0
+                )
+            masktemp = gr.Slider(
+                label="mask temperature",
+                minimum=0.0,
+                maximum=100.0,
+                value=1.5
+            )
+            sampletemp = gr.Slider(
+                label="sample temperature",
+                minimum=0.1,
+                maximum=10.0,
+                value=1.0,
+                step=0.001
+            )
+            with gr.Accordion("sampling settings", open=False):
+                top_p = gr.Slider(
+                    label="top p (0.0 = off)",
+                    minimum=0.0,
+                    maximum=1.0,
+                    value=0.0
+                )
+                typical_filtering = gr.Checkbox(
+                    label="typical filtering ",
+                    value=False
+                )
+                typical_mass = gr.Slider(
+                    label="typical mass (should probably stay between 0.1 and 0.5)",
+                    minimum=0.01,
+                    maximum=0.99,
+                    value=0.15
+                )
+                typical_min_tokens = gr.Slider(
+                    label="typical min tokens (should probably stay between 1 and 256)",
+                    minimum=1,
+                    maximum=256,
+                    step=1,
+                    value=64
+                )
+                sample_cutoff = gr.Slider(
+                    label="sample cutoff",
+                    minimum=0.0,
+                    maximum=1.0,
+                    value=0.5,
+                    step=0.01
+                )
+            use_coarse2fine = gr.Checkbox(
+                label="use coarse2fine",
+                value=True,
+                visible=False
+            )
+            num_steps = gr.Slider(
+                label="number of steps (should normally be between 12 and 36)",
+                minimum=1,
+                maximum=128,
+                step=1,
+                value=36
+            )
+            dropout = gr.Slider(
+                label="mask dropout",
+                minimum=0.0,
+                maximum=1.0,
+                step=0.01,
+                value=0.0
+            )
+            seed = gr.Number(
+                label="seed (0 for random)",
+                value=0,
+                precision=0,
+            )
+        # mask settings
+        with gr.Column():
+            model_choice = gr.Dropdown(
+                label="model choice",
+                choices=list(MODEL_CHOICES.keys()),
+                value="default",
+                visible=True
+            )
+            vamp_button = gr.Button("generate (vamp)!!!")
+            output_audio = gr.Audio(
+                label="output audio",
+                interactive=False,
+                type="filepath"
+            )
+            notes_text = gr.Textbox(
+                label="type any notes about the generated audio here",
+                value="",
+                interactive=True
+            )
+            save_button = gr.Button("save vamp")
+            download_file = gr.File(
+                label="vamp to download will appear here",
+                interactive=False
+            )
+            use_as_input_button = gr.Button("use output as input")
+            thank_you = gr.Markdown("")
+    _inputs = {
+            input_audio,
+            num_steps,
+            masktemp,
+            sampletemp,
+            top_p,
+            prefix_s, suffix_s,
+            rand_mask_intensity,
+            periodic_p, periodic_w,
+            n_conditioning_codebooks,
+            dropout,
+            use_coarse2fine,
+            stretch_factor,
+            onset_mask_width,
+            typical_filtering,
+            typical_mass,
+            typical_min_tokens,
+            beat_mask_width,
+            beat_mask_downbeats,
+            seed,
+            model_choice,
+            n_mask_codebooks,
+            pitch_shift_amt,
+            sample_cutoff
+        }
+    # connect widgets
+    vamp_button.click(
+        fn=vamp,
+        inputs=_inputs,
+        outputs=[output_audio, audio_mask],
+    )
+    api_vamp_button = gr.Button("api vamp", visible=False)
+    api_vamp_button.click(
+        fn=api_vamp,
+        inputs=_inputs,
+        outputs=[output_audio],
+        api_name="vamp"
+    )
+    use_as_input_button.click(
+        fn=lambda x: x,
+        inputs=[output_audio],
+        outputs=[input_audio]
+    )
+    save_button.click(
+        fn=save_vamp,
+        inputs=_inputs | {notes_text, output_audio},
+        outputs=[thank_you, download_file]
+    )
+demo.launch(share=True, debug=True)
+demo.queue()

assets/example.wav ADDED Viewed

Binary file (883 kB). View file

conf/c2f.yml ADDED Viewed

	@@ -0,0 +1,14 @@

+$include:
+  - conf/vampnet.yml
+VampNet.n_codebooks: 14
+VampNet.n_conditioning_codebooks: 4
+VampNet.embedding_dim: 1280
+VampNet.n_layers: 16
+VampNet.n_heads: 20
+AudioDataset.duration: 3.0
+AudioDataset.loudness_cutoff: -40.0

conf/generated/bbc-humans/c2f.yml ADDED Viewed

	@@ -0,0 +1,15 @@

+$include:
+- conf/lora/lora.yml
+AudioDataset.duration: 3.0
+AudioDataset.loudness_cutoff: -40.0
+VampNet.embedding_dim: 1280
+VampNet.n_codebooks: 14
+VampNet.n_conditioning_codebooks: 4
+VampNet.n_heads: 20
+VampNet.n_layers: 16
+fine_tune: true
+fine_tune_checkpoint: ./models/vampnet/c2f.pth
+save_path: ./runs/bbc-humans/c2f
+train/AudioLoader.sources: &id001
+- /home/hugo/Humans/
+val/AudioLoader.sources: *id001

conf/generated/bbc-humans/coarse.yml ADDED Viewed

	@@ -0,0 +1,8 @@

+$include:
+- conf/lora/lora.yml
+fine_tune: true
+fine_tune_checkpoint: ./models/vampnet/coarse.pth
+save_path: ./runs/bbc-humans/coarse
+train/AudioLoader.sources: &id001
+- /home/hugo/Humans/
+val/AudioLoader.sources: *id001

conf/generated/bbc-humans/interface.yml ADDED Viewed

	@@ -0,0 +1,6 @@

+AudioLoader.sources:
+- - /home/hugo/Humans/
+Interface.coarse2fine_ckpt: ./runs/bbc-humans/c2f/latest/vampnet/weights.pth
+Interface.coarse_ckpt: ./runs/bbc-humans/coarse/latest/vampnet/weights.pth
+Interface.codec_ckpt: ./models/vampnet/codec.pth
+Interface.wavebeat_ckpt: ./models/wavebeat.pth

conf/generated/boleros/c2f.yml ADDED Viewed

	@@ -0,0 +1,15 @@

+$include:
+- conf/lora/lora.yml
+AudioDataset.duration: 3.0
+AudioDataset.loudness_cutoff: -40.0
+VampNet.embedding_dim: 1280
+VampNet.n_codebooks: 14
+VampNet.n_conditioning_codebooks: 4
+VampNet.n_heads: 20
+VampNet.n_layers: 16
+fine_tune: true
+fine_tune_checkpoint: ./models/vampnet/c2f.pth
+save_path: ./runs/boleros/c2f
+train/AudioLoader.sources: &id001
+- /media/CHONK/hugo/loras/boleros
+val/AudioLoader.sources: *id001

conf/generated/boleros/coarse.yml ADDED Viewed

	@@ -0,0 +1,8 @@

+$include:
+- conf/lora/lora.yml
+fine_tune: true
+fine_tune_checkpoint: ./models/vampnet/coarse.pth
+save_path: ./runs/boleros/coarse
+train/AudioLoader.sources: &id001
+- /media/CHONK/hugo/loras/boleros
+val/AudioLoader.sources: *id001

conf/generated/boleros/interface.yml ADDED Viewed

	@@ -0,0 +1,6 @@

+AudioLoader.sources:
+- - /media/CHONK/hugo/loras/boleros
+Interface.coarse2fine_ckpt: ./runs/boleros/c2f/latest/vampnet/weights.pth
+Interface.coarse_ckpt: ./runs/boleros/coarse/latest/vampnet/weights.pth
+Interface.codec_ckpt: ./models/vampnet/codec.pth
+Interface.wavebeat_ckpt: ./models/wavebeat.pth

conf/generated/bowl/c2f.yml ADDED Viewed

	@@ -0,0 +1,16 @@

+$include:
+- conf/lora/lora.yml
+AudioDataset.duration: 3.0
+AudioDataset.loudness_cutoff: -40.0
+VampNet.embedding_dim: 1280
+VampNet.n_codebooks: 14
+VampNet.n_conditioning_codebooks: 4
+VampNet.n_heads: 20
+VampNet.n_layers: 16
+fine_tune: true
+fine_tune_checkpoint: ./models/vampnet/c2f.pth
+save_path: ./runs/bowl/c2f
+train/AudioLoader.sources: &id001
+- /media/seagate_prosound/prosound_core_complete/Anns
+- Animals
+val/AudioLoader.sources: *id001

conf/generated/bowl/coarse.yml ADDED Viewed

	@@ -0,0 +1,9 @@

+$include:
+- conf/lora/lora.yml
+fine_tune: true
+fine_tune_checkpoint: ./models/vampnet/coarse.pth
+save_path: ./runs/bowl/coarse
+train/AudioLoader.sources: &id001
+- /media/seagate_prosound/prosound_core_complete/Anns
+- Animals
+val/AudioLoader.sources: *id001

conf/generated/bowl/interface.yml ADDED Viewed

	@@ -0,0 +1,7 @@

+AudioLoader.sources:
+- - /media/seagate_prosound/prosound_core_complete/Anns
+  - Animals
+Interface.coarse2fine_ckpt: ./runs/bowl/c2f/latest/vampnet/weights.pth
+Interface.coarse_ckpt: ./runs/bowl/coarse/latest/vampnet/weights.pth
+Interface.codec_ckpt: ./models/vampnet/codec.pth
+Interface.wavebeat_ckpt: ./models/wavebeat.pth

conf/generated/breaks-steps/interface.yml ADDED Viewed

	@@ -0,0 +1,8 @@

+AudioLoader.sources:
+- - /media/CHONK/hugo/machines
+Interface.coarse2fine_ckpt: ./runs-june-23/breaks-steps/c2f/best/vampnet/weights.pth
+Interface.coarse2fine_lora_ckpt: ./runs-june-23/breaks-steps/c2f/best/lora.pth
+Interface.coarse_ckpt: ./runs-june-23/breaks-steps/coarse/best/vampnet/weights.pth
+Interface.coarse_lora_ckpt: ./runs-june-23/breaks-steps/coarse/best/lora.pth
+Interface.codec_ckpt: ./models/vampnet/codec.pth
+Interface.wavebeat_ckpt: ./models/wavebeat.pth

conf/generated/choir/interface.yml ADDED Viewed

	@@ -0,0 +1,9 @@

+AudioLoader.sources:
+- - /media/CHONK/hugo/knower
+Interface.coarse2fine_ckpt: ./runs/choir/c2f/latest/vampnet/weights.pth
+Interface.coarse2fine_lora_ckpt: ./runs/choir/c2f/latest/lora.pth
+Interface.coarse_ckpt: ./runs/choir/coarse/latest/vampnet/weights.pth
+Interface.coarse_lora_ckpt: ./runs/choir/coarse/latest/lora.pth
+Interface.codec_ckpt: ./models/vampnet/codec.pth
+Interface.wavebeat_ckpt: ./models/wavebeat.pth
+Interface.coarse_chunk_size_s: 15

conf/generated/earlymachines/c2f.yml ADDED Viewed

	@@ -0,0 +1,15 @@

+$include:
+- conf/lora/lora.yml
+AudioDataset.duration: 3.0
+AudioDataset.loudness_cutoff: -40.0
+VampNet.embedding_dim: 1280
+VampNet.n_codebooks: 14
+VampNet.n_conditioning_codebooks: 4
+VampNet.n_heads: 20
+VampNet.n_layers: 16
+fine_tune: true
+fine_tune_checkpoint: ./models/vampnet/c2f.pth
+save_path: ./runs/machines/c2f
+train/AudioLoader.sources: &id001
+- /media/CHONK/hugo/machines
+val/AudioLoader.sources: *id001

conf/generated/earlymachines/coarse.yml ADDED Viewed

	@@ -0,0 +1,8 @@

+$include:
+- conf/lora/lora.yml
+fine_tune: true
+fine_tune_checkpoint: ./models/vampnet/coarse.pth
+save_path: ./runs/machines/coarse
+train/AudioLoader.sources: &id001
+- /media/CHONK/hugo/machines
+val/AudioLoader.sources: *id001

conf/generated/earlymachines/interface.yml ADDED Viewed

	@@ -0,0 +1,8 @@

+AudioLoader.sources:
+- - /media/CHONK/hugo/machines
+Interface.coarse2fine_ckpt: ./runs/machines/c2f/best/vampnet/weights.pth
+Interface.coarse2fine_lora_ckpt: ./runs/machines/c2f/best/lora.pth
+Interface.coarse_ckpt: ./runs/machines/coarse/best/vampnet/weights.pth
+Interface.coarse_lora_ckpt: ./runs/machines/coarse/best/lora.pth
+Interface.codec_ckpt: ./models/vampnet/codec.pth
+Interface.wavebeat_ckpt: ./models/wavebeat.pth

conf/generated/funk/c2f.yml ADDED Viewed

	@@ -0,0 +1,15 @@

+$include:
+- conf/lora/lora.yml
+AudioDataset.duration: 3.0
+AudioDataset.loudness_cutoff: -40.0
+VampNet.embedding_dim: 1280
+VampNet.n_codebooks: 14
+VampNet.n_conditioning_codebooks: 4
+VampNet.n_heads: 20
+VampNet.n_layers: 16
+fine_tune: true
+fine_tune_checkpoint: ./models/vampnet/c2f.pth
+save_path: ./runs/knower/c2f
+train/AudioLoader.sources: &id001
+- /media/CHONK/hugo/knower
+val/AudioLoader.sources: *id001

conf/generated/funk/coarse.yml ADDED Viewed

	@@ -0,0 +1,8 @@

+$include:
+- conf/lora/lora.yml
+fine_tune: true
+fine_tune_checkpoint: ./models/vampnet/coarse.pth
+save_path: ./runs/knower/coarse
+train/AudioLoader.sources: &id001
+- /media/CHONK/hugo/knower
+val/AudioLoader.sources: *id001

conf/generated/funk/interface.yml ADDED Viewed

	@@ -0,0 +1,8 @@

+AudioLoader.sources:
+- - /media/CHONK/hugo/knower
+Interface.coarse2fine_ckpt: ./runs/knower/c2f/latest/vampnet/weights.pth
+Interface.coarse2fine_lora_ckpt: ./runs/knower/c2f/latest/lora.pth
+Interface.coarse_ckpt: ./runs/knower/coarse/latest/vampnet/weights.pth
+Interface.coarse_lora_ckpt: ./runs/knower/coarse/latest/lora.pth
+Interface.codec_ckpt: ./models/vampnet/codec.pth
+Interface.wavebeat_ckpt: ./models/wavebeat.pth

conf/generated/ismir-birds/c2f.yml ADDED Viewed

	@@ -0,0 +1,15 @@

+$include:
+- conf/lora/lora.yml
+AudioDataset.duration: 3.0
+AudioDataset.loudness_cutoff: -40.0
+VampNet.embedding_dim: 1280
+VampNet.n_codebooks: 14
+VampNet.n_conditioning_codebooks: 4
+VampNet.n_heads: 20
+VampNet.n_layers: 16
+fine_tune: true
+fine_tune_checkpoint: ./models/vampnet/c2f.pth
+save_path: ./runs/ismir-birds/c2f
+train/AudioLoader.sources: &id001
+- /media/CHONK/hugo/ismir-23-cfm/xeno-canto-augmented-filtered
+val/AudioLoader.sources: *id001

conf/generated/ismir-birds/coarse.yml ADDED Viewed

	@@ -0,0 +1,8 @@

+$include:
+- conf/lora/lora.yml
+fine_tune: true
+fine_tune_checkpoint: ./models/vampnet/coarse.pth
+save_path: ./runs/ismir-birds/coarse
+train/AudioLoader.sources: &id001
+- /media/CHONK/hugo/ismir-23-cfm/xeno-canto-augmented-filtered
+val/AudioLoader.sources: *id001

conf/generated/ismir-birds/interface.yml ADDED Viewed

	@@ -0,0 +1,8 @@

+AudioLoader.sources:
+- - /media/CHONK/hugo/ismir-23-cfm/xeno-canto-augmented-filtered
+Interface.coarse2fine_ckpt: ./runs/ismir-birds/c2f/latest/vampnet/weights.pth
+Interface.coarse2fine_lora_ckpt: ./runs/ismir-birds/c2f/latest/lora.pth
+Interface.coarse_ckpt: ./runs/ismir-birds/coarse/latest/vampnet/weights.pth
+Interface.coarse_lora_ckpt: ./runs/ismir-birds/coarse/latest/lora.pth
+Interface.codec_ckpt: ./models/vampnet/codec.pth
+Interface.wavebeat_ckpt: ./models/wavebeat.pth

conf/generated/ismir-machines/c2f.yml ADDED Viewed

	@@ -0,0 +1,15 @@

+$include:
+- conf/lora/lora.yml
+AudioDataset.duration: 3.0
+AudioDataset.loudness_cutoff: -40.0
+VampNet.embedding_dim: 1280
+VampNet.n_codebooks: 14
+VampNet.n_conditioning_codebooks: 4
+VampNet.n_heads: 20
+VampNet.n_layers: 16
+fine_tune: true
+fine_tune_checkpoint: ./models/vampnet/c2f.pth
+save_path: ./runs/ismir-machines/c2f
+train/AudioLoader.sources: &id001
+- /media/CHONK/hugo/ismir-23-cfm/machines-augmented-filtered
+val/AudioLoader.sources: *id001

conf/generated/ismir-machines/coarse.yml ADDED Viewed

	@@ -0,0 +1,8 @@

+$include:
+- conf/lora/lora.yml
+fine_tune: true
+fine_tune_checkpoint: ./models/vampnet/coarse.pth
+save_path: ./runs/ismir-machines/coarse
+train/AudioLoader.sources: &id001
+- /media/CHONK/hugo/ismir-23-cfm/machines-augmented-filtered
+val/AudioLoader.sources: *id001

conf/generated/ismir-machines/interface.yml ADDED Viewed

	@@ -0,0 +1,8 @@

+AudioLoader.sources:
+- - /media/CHONK/hugo/ismir-23-cfm/machines-augmented-filtered
+Interface.coarse2fine_ckpt: ./runs/ismir-machines/c2f/latest/vampnet/weights.pth
+Interface.coarse2fine_lora_ckpt: ./runs/ismir-machines/c2f/latest/lora.pth
+Interface.coarse_ckpt: ./runs/ismir-machines/coarse/latest/vampnet/weights.pth
+Interface.coarse_lora_ckpt: ./runs/ismir-machines/coarse/latest/lora.pth
+Interface.codec_ckpt: ./models/vampnet/codec.pth
+Interface.wavebeat_ckpt: ./models/wavebeat.pth

conf/generated/machines/c2f.yml ADDED Viewed

	@@ -0,0 +1,15 @@

+$include:
+- conf/lora/lora.yml
+AudioDataset.duration: 3.0
+AudioDataset.loudness_cutoff: -40.0
+VampNet.embedding_dim: 1280
+VampNet.n_codebooks: 14
+VampNet.n_conditioning_codebooks: 4
+VampNet.n_heads: 20
+VampNet.n_layers: 16
+fine_tune: true
+fine_tune_checkpoint: ./models/vampnet/c2f.pth
+save_path: ./runs/machines/c2f
+train/AudioLoader.sources: &id001
+- /media/CHONK/hugo/machines
+val/AudioLoader.sources: *id001

conf/generated/machines/coarse.yml ADDED Viewed

	@@ -0,0 +1,8 @@

+$include:
+- conf/lora/lora.yml
+fine_tune: true
+fine_tune_checkpoint: ./models/vampnet/coarse.pth
+save_path: ./runs/machines/coarse
+train/AudioLoader.sources: &id001
+- /media/CHONK/hugo/machines
+val/AudioLoader.sources: *id001

conf/generated/machines/interface.yml ADDED Viewed

	@@ -0,0 +1,8 @@

+AudioLoader.sources:
+- - /media/CHONK/hugo/machines
+Interface.coarse2fine_ckpt: ./runs/machines/c2f/best/vampnet/weights.pth
+Interface.coarse2fine_lora_ckpt: ./runs/machines/c2f/best/lora.pth
+Interface.coarse_ckpt: ./runs/machines/coarse/best/vampnet/weights.pth
+Interface.coarse_lora_ckpt: ./runs/machines/coarse/best/lora.pth
+Interface.codec_ckpt: ./models/vampnet/codec.pth
+Interface.wavebeat_ckpt: ./models/wavebeat.pth

conf/generated/musdb/c2f.yml ADDED Viewed

	@@ -0,0 +1,40 @@

+$include:
+- conf/lora/lora.yml
+AudioDataset.duration: 3.0
+AudioDataset.loudness_cutoff: -40.0
+VampNet.embedding_dim: 1280
+VampNet.n_codebooks: 14
+VampNet.n_conditioning_codebooks: 4
+VampNet.n_heads: 20
+VampNet.n_layers: 16
+fine_tune: true
+fine_tune_checkpoint: ./models/vampnet/c2f.pth
+save_path: ./runs/xeno-canto/c2f
+AudioDataset.aligned: true
+train/build_dataset.folders:
+  bass:
+    sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/train]
+    ext: "bass.wav"
+  drums:
+    sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/train]
+    ext: "drums.wav"
+  other:
+    sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/train]
+    ext: "other.wav"
+  vocals:
+    sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/train]
+    ext: "vocals.wav"
+val/build_dataset.folders:
+  bass:
+    sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/val]
+    ext: "bass.wav"
+  drums:
+    sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/val]
+    ext: "drums.wav"
+  other:
+    sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/val]
+    ext: "other.wav"
+  vocals:
+    sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/val]
+    ext: "vocals.wav"

conf/generated/musdb/coarse.yml ADDED Viewed

	@@ -0,0 +1,31 @@

+$include:
+- conf/lora/lora.yml
+fine_tune: true
+fine_tune_checkpoint: ./models/vampnet/coarse.pth
+save_path: ./runs/xeno-canto/coarse
+train/build_dataset.folders:
+  bass:
+    sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/train/]
+    ext: ["bass.wav"]
+  drums:
+    sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/train/]
+    ext: ["drums.wav"]
+  other:
+    sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/train/]
+    ext: ["other.wav"]
+  vocals:
+    sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/train/]
+    ext: ["vocals.wav"]
+val/build_dataset.folders:
+  bass:
+    sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/test/]
+    ext: ["bass.wav"]
+  drums:
+    sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/test/]
+    ext: ["drums.wav"]
+  other:
+    sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/test/]
+    ext: ["other.wav"]
+  vocals:
+    sources: [/media/CHONK/hugo/ial-datasets/musdb18hq/test/]
+    ext: ["vocals.wav"]

conf/generated/musdb/interface.yml ADDED Viewed

	@@ -0,0 +1,8 @@

+Interface.coarse2fine_ckpt: ./models/vampnet/c2f.pth
+Interface.coarse2fine_lora_ckpt: null
+Interface.coarse_ckpt: ./runs/musdb-cond-clfdrop/best/vampnet/weights.pth
+Interface.coarse_lora_ckpt: null
+Interface.codec_ckpt: ./models/vampnet/codec.pth
+Interface.wavebeat_ckpt: ./models/wavebeat.pth

conf/generated/n64/c2f.yml ADDED Viewed

	@@ -0,0 +1,15 @@

+$include:
+- conf/lora/lora.yml
+AudioDataset.duration: 3.0
+AudioDataset.loudness_cutoff: -40.0
+VampNet.embedding_dim: 1280
+VampNet.n_codebooks: 14
+VampNet.n_conditioning_codebooks: 4
+VampNet.n_heads: 20
+VampNet.n_layers: 16
+fine_tune: true
+fine_tune_checkpoint: ./models/vampnet/c2f.pth
+save_path: ./runs/n64/c2f
+train/AudioLoader.sources: &id001
+- data/salad-bowl/n64-jungle/n64-jungle-mix.wav
+val/AudioLoader.sources: *id001

conf/generated/n64/coarse.yml ADDED Viewed

	@@ -0,0 +1,8 @@

+$include:
+- conf/lora/lora.yml
+fine_tune: true
+fine_tune_checkpoint: ./models/vampnet/coarse.pth
+save_path: ./runs/n64/coarse
+train/AudioLoader.sources: &id001
+- data/salad-bowl/n64-jungle/n64-jungle-mix.wav
+val/AudioLoader.sources: *id001

conf/generated/n64/interface.yml ADDED Viewed

	@@ -0,0 +1,6 @@

+AudioLoader.sources:
+- - data/salad-bowl/n64-jungle/n64-jungle-mix.wav
+Interface.coarse2fine_ckpt: ./runs/n64/c2f/latest/vampnet/weights.pth
+Interface.coarse_ckpt: ./runs/n64/coarse/latest/vampnet/weights.pth
+Interface.codec_ckpt: ./models/vampnet/codec.pth
+Interface.wavebeat_ckpt: ./models/wavebeat.pth

conf/generated/natural-sounds/c2f.yml ADDED Viewed

	@@ -0,0 +1,28 @@

+$include:
+- conf/lora/lora.yml
+AudioDataset.duration: 3.0
+AudioDataset.loudness_cutoff: -40.0
+VampNet.embedding_dim: 1280
+VampNet.n_codebooks: 14
+VampNet.n_conditioning_codebooks: 4
+VampNet.n_heads: 20
+VampNet.n_layers: 16
+fine_tune: true
+fine_tune_checkpoint: ./models/vampnet/c2f.pth
+save_path: ./runs/soundrangers-v2/c2f
+train/AudioLoader.sources: &id001
+- /media/CHONK2/prosound_redacted/Soundrangers Complete
+- /media/CHONK2/prosound_redacted/Soundrangers Update 2018
+- /media/CHONK2/prosound_redacted/BBC Nature Sound Effects Library/Animals
+- /media/CHONK2/prosound_redacted/BBC Nature Sound Effects Library/Birds
+- /media/CHONK2/prosound_redacted/BBC Historical and 1-166 Sound Effects Library/Foley
+- /media/CHONK2/prosound_redacted/BBC Historical and 1-166 Sound Effects Library/Musical
+- /media/CHONK2/prosound_redacted/Big Room Complete/Mammals - Dogs
+- /media/CHONK2/prosound_redacted/Big Room Complete/Mammals - Farm
+- /media/CHONK2/prosound_redacted/Big Room Complete/Mammals - Horses
+- /media/CHONK2/prosound_redacted/Big Room Complete/Mammals - Rodents
+- /media/CHONK2/prosound_redacted/Big Room Complete/Mammals - Wild
+- /media/CHONK2/prosound_redacted/Big Room Complete/Bells
+- /media/CHONK2/prosound_redacted/King Collection - Volume 1/Musical - Chimes
+- /media/CHONK2/prosound_redacted/King Collection - Volume 1/Musical - Instruments
+val/AudioLoader.sources: *id001

conf/generated/natural-sounds/coarse.yml ADDED Viewed

	@@ -0,0 +1,21 @@

+$include:
+- conf/lora/lora.yml
+fine_tune: true
+fine_tune_checkpoint: ./models/vampnet/coarse.pth
+save_path: ./runs/soundrangers-v2/coarse
+train/AudioLoader.sources: &id001
+- /media/CHONK2/prosound_redacted/Soundrangers Complete
+- /media/CHONK2/prosound_redacted/Soundrangers Update 2018
+- /media/CHONK2/prosound_redacted/BBC Nature Sound Effects Library/Animals
+- /media/CHONK2/prosound_redacted/BBC Nature Sound Effects Library/Birds
+- /media/CHONK2/prosound_redacted/BBC Historical and 1-166 Sound Effects Library/Foley
+- /media/CHONK2/prosound_redacted/BBC Historical and 1-166 Sound Effects Library/Musical
+- /media/CHONK2/prosound_redacted/Big Room Complete/Mammals - Dogs
+- /media/CHONK2/prosound_redacted/Big Room Complete/Mammals - Farm
+- /media/CHONK2/prosound_redacted/Big Room Complete/Mammals - Horses
+- /media/CHONK2/prosound_redacted/Big Room Complete/Mammals - Rodents
+- /media/CHONK2/prosound_redacted/Big Room Complete/Mammals - Wild
+- /media/CHONK2/prosound_redacted/Big Room Complete/Bells
+- /media/CHONK2/prosound_redacted/King Collection - Volume 1/Musical - Chimes
+- /media/CHONK2/prosound_redacted/King Collection - Volume 1/Musical - Instruments
+val/AudioLoader.sources: *id001

conf/generated/natural-sounds/interface.yml ADDED Viewed

	@@ -0,0 +1,7 @@

+AudioLoader.sources:
+- - /media/CHONK2/prosound_redacted/Soundrangers
+  - Complete
+Interface.coarse2fine_ckpt: ./runs/soundrangers-v2/c2f/latest/vampnet/weights.pth
+Interface.coarse_ckpt: ./runs/soundrangers-v2/coarse/latest/vampnet/weights.pth
+Interface.codec_ckpt: ./models/vampnet/codec.pth
+Interface.wavebeat_ckpt: ./models/wavebeat.pth

conf/generated/nes/c2f.yml ADDED Viewed

	@@ -0,0 +1,15 @@

+$include:
+- conf/lora/lora.yml
+AudioDataset.duration: 3.0
+AudioDataset.loudness_cutoff: -40.0
+VampNet.embedding_dim: 1280
+VampNet.n_codebooks: 14
+VampNet.n_conditioning_codebooks: 4
+VampNet.n_heads: 20
+VampNet.n_layers: 16
+fine_tune: true
+fine_tune_checkpoint: ./models/vampnet/c2f.pth
+save_path: ./runs/nes/c2f
+train/AudioLoader.sources: &id001
+- data/salad-bowl/chiptune/nes.wav
+val/AudioLoader.sources: *id001

conf/generated/nes/coarse.yml ADDED Viewed

	@@ -0,0 +1,8 @@

+$include:
+- conf/lora/lora.yml
+fine_tune: true
+fine_tune_checkpoint: ./models/vampnet/coarse.pth
+save_path: ./runs/nes/coarse
+train/AudioLoader.sources: &id001
+- data/salad-bowl/chiptune/nes.wav
+val/AudioLoader.sources: *id001

conf/generated/nes/interface.yml ADDED Viewed

	@@ -0,0 +1,6 @@

+AudioLoader.sources:
+- - data/salad-bowl/chiptune/nes.wav
+Interface.coarse2fine_ckpt: ./runs/nes/c2f/latest/vampnet/weights.pth
+Interface.coarse_ckpt: ./runs/nes/coarse/latest/vampnet/weights.pth
+Interface.codec_ckpt: ./models/vampnet/codec.pth
+Interface.wavebeat_ckpt: ./models/wavebeat.pth

conf/generated/nyc-subway/c2f.yml ADDED Viewed

	@@ -0,0 +1,15 @@

+$include:
+- conf/lora/lora.yml
+AudioDataset.duration: 3.0
+AudioDataset.loudness_cutoff: -40.0
+VampNet.embedding_dim: 1280
+VampNet.n_codebooks: 14
+VampNet.n_conditioning_codebooks: 4
+VampNet.n_heads: 20
+VampNet.n_layers: 16
+fine_tune: true
+fine_tune_checkpoint: ./models/vampnet/c2f.pth
+save_path: ./runs/nyc-subway/c2f
+train/AudioLoader.sources: &id001
+- /media/CHONK/hugo/nyc-subway
+val/AudioLoader.sources: *id001

conf/generated/nyc-subway/coarse.yml ADDED Viewed

	@@ -0,0 +1,8 @@

+$include:
+- conf/lora/lora.yml
+fine_tune: true
+fine_tune_checkpoint: ./models/vampnet/coarse.pth
+save_path: ./runs/nyc-subway/coarse
+train/AudioLoader.sources: &id001
+- /media/CHONK/hugo/nyc-subway
+val/AudioLoader.sources: *id001

conf/generated/nyc-subway/interface.yml ADDED Viewed

	@@ -0,0 +1,8 @@

+AudioLoader.sources:
+- /media/CHONK/hugo/nyc-subway
+Interface.coarse2fine_ckpt: ./runs/nyc-subway/c2f/latest/vampnet/weights.pth
+Interface.coarse2fine_lora_ckpt: ./runs/nyc-subway/c2f/latest/lora.pth
+Interface.coarse_ckpt: ./runs/nyc-subway/coarse/latest/vampnet/weights.pth
+Interface.coarse_lora_ckpt: ./runs/nyc-subway/coarse/latest/lora.pth
+Interface.codec_ckpt: ./models/vampnet/codec.pth
+Interface.wavebeat_ckpt: ./models/wavebeat.pth

conf/generated/ocean-waves/c2f.yml ADDED Viewed

	@@ -0,0 +1,15 @@

+$include:
+- conf/lora/lora.yml
+AudioDataset.duration: 3.0
+AudioDataset.loudness_cutoff: -40.0
+VampNet.embedding_dim: 1280
+VampNet.n_codebooks: 14
+VampNet.n_conditioning_codebooks: 4
+VampNet.n_heads: 20
+VampNet.n_layers: 16
+fine_tune: true
+fine_tune_checkpoint: ./models/vampnet/c2f.pth
+save_path: ./runs/ocean-waves/c2f
+train/AudioLoader.sources: &id001
+- /media/CHONK/hugo/ocean-waves-sounds
+val/AudioLoader.sources: *id001

conf/generated/ocean-waves/coarse.yml ADDED Viewed

	@@ -0,0 +1,8 @@

+$include:
+- conf/lora/lora.yml
+fine_tune: true
+fine_tune_checkpoint: ./models/vampnet/coarse.pth
+save_path: ./runs/ocean-waves/coarse
+train/AudioLoader.sources: &id001
+- /media/CHONK/hugo/ocean-waves-sounds
+val/AudioLoader.sources: *id001

conf/generated/ocean-waves/interface.yml ADDED Viewed

	@@ -0,0 +1,8 @@

+AudioLoader.sources:
+- /media/CHONK/hugo/ocean-waves-sounds.mp3
+Interface.coarse2fine_ckpt: ./runs/ocean-waves/c2f/latest/vampnet/weights.pth
+Interface.coarse2fine_lora_ckpt: ./runs/ocean-waves/c2f/latest/lora.pth
+Interface.coarse_ckpt: ./runs/ocean-waves/coarse/latest/vampnet/weights.pth
+Interface.coarse_lora_ckpt: ./runs/ocean-waves/coarse/latest/lora.pth
+Interface.codec_ckpt: ./models/vampnet/codec.pth
+Interface.wavebeat_ckpt: ./models/wavebeat.pth