publish step 50,000 (Soundboard)
Browse files- LICENSE.NSCL-A2SB +64 -0
- LICENSE.PolyForm-NC +73 -0
- LICENSING.md +73 -0
- README.md +157 -0
- config.json +66 -0
- model.pt +3 -0
- profile.json +16 -0
LICENSE.NSCL-A2SB
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
NVIDIA Source Code License for A2SB
|
| 2 |
+
|
| 3 |
+
1. Definitions
|
| 4 |
+
|
| 5 |
+
“Licensor” means any person or entity that distributes its Work.
|
| 6 |
+
|
| 7 |
+
“Software” means the original work of authorship made available under this License.
|
| 8 |
+
|
| 9 |
+
“Work” means the Software and any additions to or derivative works of the Software that are made available under
|
| 10 |
+
this License.
|
| 11 |
+
|
| 12 |
+
The terms “reproduce,” “reproduction,” “derivative works,” and “distribution” have the meaning as provided under
|
| 13 |
+
U.S. copyright law; provided, however, that for the purposes of this License, derivative works shall not include
|
| 14 |
+
works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work.
|
| 15 |
+
|
| 16 |
+
Works, including the Software, are “made available” under this License by including in or with the Work either
|
| 17 |
+
(a) a copyright notice referencing the applicability of this License to the Work, or (b) a copy of this License.
|
| 18 |
+
|
| 19 |
+
2. License Grant
|
| 20 |
+
|
| 21 |
+
2.1 Copyright Grant. Subject to the terms and conditions of this License, each Licensor grants to you a perpetual,
|
| 22 |
+
worldwide, non-exclusive, royalty-free, copyright license to reproduce, prepare derivative works of, publicly
|
| 23 |
+
display, publicly perform, sublicense and distribute its Work and any resulting derivative works in any form.
|
| 24 |
+
|
| 25 |
+
3. Limitations
|
| 26 |
+
|
| 27 |
+
3.1 Redistribution. You may reproduce or distribute the Work only if (a) you do so under this License, (b) you
|
| 28 |
+
include a complete copy of this License with your distribution, and (c) you retain without modification any
|
| 29 |
+
copyright, patent, trademark, or attribution notices that are present in the Work.
|
| 30 |
+
|
| 31 |
+
3.2 Derivative Works. You may specify that additional or different terms apply to the use, reproduction, and
|
| 32 |
+
distribution of your derivative works of the Work (“Your Terms”) only if (a) Your Terms provide that the use
|
| 33 |
+
limitation in Section 3.3 applies to your derivative works, and (b) you identify the specific derivative works
|
| 34 |
+
that are subject to Your Terms. Notwithstanding Your Terms, this License (including the redistribution
|
| 35 |
+
requirements in Section 3.1) will continue to apply to the Work itself.
|
| 36 |
+
|
| 37 |
+
3.3 Use Limitation. The Work and any derivative works thereof only may be used or intended for use
|
| 38 |
+
non-commercially. Notwithstanding the foregoing, NVIDIA and its affiliates may use the Work and any derivative
|
| 39 |
+
works commercially. As used herein, “non-commercially” means for research or evaluation purposes only.
|
| 40 |
+
|
| 41 |
+
3.4 Patent Claims. If you bring or threaten to bring a patent claim against any Licensor (including any claim,
|
| 42 |
+
cross-claim or counterclaim in a lawsuit) to enforce any patents that you allege are infringed by any Work, then
|
| 43 |
+
your rights under this License from such Licensor (including the grant in Section 2.1) will terminate immediately.
|
| 44 |
+
|
| 45 |
+
3.5 Trademarks. This License does not grant any rights to use any Licensor’s or its affiliates’ names, logos,
|
| 46 |
+
or trademarks, except as necessary to reproduce the notices described in this License.
|
| 47 |
+
|
| 48 |
+
3.6 Termination. If you violate any term of this License, then your rights under this License (including the
|
| 49 |
+
grant in Section 2.1) will terminate immediately.
|
| 50 |
+
|
| 51 |
+
4. Disclaimer of Warranty.
|
| 52 |
+
|
| 53 |
+
THE WORK IS PROVIDED “AS IS” WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING
|
| 54 |
+
WARRANTIES OR CONDITIONS OF M ERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR NON-INFRINGEMENT. YOU
|
| 55 |
+
BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER THIS LICENSE.
|
| 56 |
+
|
| 57 |
+
5. Limitation of Liability.
|
| 58 |
+
|
| 59 |
+
EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL THEORY, WHETHER IN TORT (INCLUDING
|
| 60 |
+
NEGLIGENCE), CONTRACT, OR OTHERWISE SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT,
|
| 61 |
+
INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF OR RELATED TO THIS LICENSE, THE USE OR
|
| 62 |
+
INABILITY TO USE THE WORK (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION, LOST PROFITS OR
|
| 63 |
+
DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER COMM ERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN
|
| 64 |
+
ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
|
LICENSE.PolyForm-NC
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# PolyForm Noncommercial License 1.0.0
|
| 2 |
+
|
| 3 |
+
<https://polyformproject.org/licenses/noncommercial/1.0.0>
|
| 4 |
+
|
| 5 |
+
## Acceptance
|
| 6 |
+
|
| 7 |
+
In order to get any license under these terms, you must agree to them as both strict obligations and conditions to all your licenses.
|
| 8 |
+
|
| 9 |
+
## Copyright License
|
| 10 |
+
|
| 11 |
+
The licensor grants you a copyright license for the software to do everything you might do with the software that would otherwise infringe the licensor's copyright in it for any permitted purpose. However, you may only distribute the software according to [Distribution License](#distribution-license) and make changes or new works based on the software according to [Changes and New Works License](#changes-and-new-works-license).
|
| 12 |
+
|
| 13 |
+
## Distribution License
|
| 14 |
+
|
| 15 |
+
The licensor grants you an additional copyright license to distribute copies of the software. Your license to distribute covers distributing the software with changes and new works permitted by [Changes and New Works License](#changes-and-new-works-license).
|
| 16 |
+
|
| 17 |
+
## Notices
|
| 18 |
+
|
| 19 |
+
You must ensure that anyone who gets a copy of any part of the software from you also gets a copy of these terms or the URL for them above, as well as copies of any plain-text lines beginning with `Required Notice:` that the licensor provided with the software. For example:
|
| 20 |
+
|
| 21 |
+
> Required Notice: Copyright Yoyodyne, Inc. (http://example.com)
|
| 22 |
+
|
| 23 |
+
## Changes and New Works License
|
| 24 |
+
|
| 25 |
+
The licensor grants you an additional copyright license to make changes and new works based on the software for any permitted purpose.
|
| 26 |
+
|
| 27 |
+
## Patent License
|
| 28 |
+
|
| 29 |
+
The licensor grants you a patent license for the software that covers patent claims the licensor can license, or becomes able to license, that you would infringe by using the software.
|
| 30 |
+
|
| 31 |
+
## Noncommercial Purposes
|
| 32 |
+
|
| 33 |
+
Any noncommercial purpose is a permitted purpose.
|
| 34 |
+
|
| 35 |
+
## Personal Uses
|
| 36 |
+
|
| 37 |
+
Personal use for research, experiment, and testing for the benefit of public knowledge, personal study, private entertainment, hobby projects, amateur pursuits, or religious observance, without any anticipated commercial application, is use for a permitted purpose.
|
| 38 |
+
|
| 39 |
+
## Noncommercial Organizations
|
| 40 |
+
|
| 41 |
+
Use by any charitable organization, educational institution, public research organization, public safety or health organization, environmental protection organization, or government institution is use for a permitted purpose regardless of the source of funding or obligations resulting from the funding.
|
| 42 |
+
|
| 43 |
+
## Fair Use
|
| 44 |
+
|
| 45 |
+
You may have "fair use" rights for the software under the law. These terms do not limit them.
|
| 46 |
+
|
| 47 |
+
## No Other Rights
|
| 48 |
+
|
| 49 |
+
These terms do not allow you to sublicense or transfer any of your licenses to anyone else, or prevent the licensor from granting licenses to anyone else. These terms do not imply any other licenses.
|
| 50 |
+
|
| 51 |
+
## Patent Defense
|
| 52 |
+
|
| 53 |
+
If you make any written claim that the software infringes or contributes to infringement of any patent, your patent license for the software granted under these terms ends immediately. If your company makes such a claim, your patent license ends immediately for work on behalf of your company.
|
| 54 |
+
|
| 55 |
+
## Violations
|
| 56 |
+
|
| 57 |
+
The first time you are notified in writing that you have violated any of these terms, or done anything with the software not covered by your licenses, your licenses can nonetheless continue if you come into full compliance with these terms, and take practical steps to correct past violations, within 32 days of receiving notice. Otherwise, all your licenses end immediately.
|
| 58 |
+
|
| 59 |
+
## No Liability
|
| 60 |
+
|
| 61 |
+
***As far as the law allows, the software comes as is, without any warranty or condition, and the licensor will not be liable to you for any damages arising out of these terms or the use or nature of the software, under any kind of legal claim.***
|
| 62 |
+
|
| 63 |
+
## Definitions
|
| 64 |
+
|
| 65 |
+
The **licensor** is the individual or entity offering these terms, and the **software** is the software the licensor makes available under these terms.
|
| 66 |
+
|
| 67 |
+
**You** refers to the individual or entity agreeing to these terms.
|
| 68 |
+
|
| 69 |
+
**Your company** is any legal entity, sole proprietorship, or other kind of organization that you work for, plus all organizations that have control over, are under the control of, or are under common control with that organization. **Control** means ownership of substantially all the assets of an entity, or the power to direct its management and policies by vote, contract, or otherwise. Control can be direct or indirect.
|
| 70 |
+
|
| 71 |
+
**Your licenses** are all the licenses granted to you for the software under these terms.
|
| 72 |
+
|
| 73 |
+
**Use** means anything you do with the software requiring one of your licenses.
|
LICENSING.md
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Licensing
|
| 2 |
+
|
| 3 |
+
This project is released under a **dual non-commercial license**:
|
| 4 |
+
|
| 5 |
+
- [`LICENSE.NSCL-A2SB`](LICENSE.NSCL-A2SB) — NVIDIA Source Code License for A2SB (the upstream license we inherit)
|
| 6 |
+
- [`LICENSE.PolyForm-NC`](LICENSE.PolyForm-NC) — PolyForm Noncommercial 1.0.0 (our additional terms)
|
| 7 |
+
|
| 8 |
+
You must comply with **both** licenses when using this work.
|
| 9 |
+
|
| 10 |
+
## What you can do
|
| 11 |
+
|
| 12 |
+
- Use it for **research, evaluation, or any other non-commercial purpose**
|
| 13 |
+
- Modify it, study it, share modifications under the same terms
|
| 14 |
+
- Cite, reproduce, and discuss it in academic or technical writing
|
| 15 |
+
|
| 16 |
+
## What you cannot do
|
| 17 |
+
|
| 18 |
+
- **Use it commercially.** This includes:
|
| 19 |
+
- Selling the model, the code, or access to either
|
| 20 |
+
- Including it in a paid product or paid service
|
| 21 |
+
- Running it as part of a monetized API, SaaS, or hosted offering
|
| 22 |
+
- Internal use within an organization for revenue-generating activities
|
| 23 |
+
- Use NVIDIA's name, logos, or trademarks in any derivative
|
| 24 |
+
|
| 25 |
+
If you want commercial use, you'd need a separate commercial license from NVIDIA
|
| 26 |
+
(for the A2SB-derived parts) and from us (for the rest). We won't pretend
|
| 27 |
+
otherwise.
|
| 28 |
+
|
| 29 |
+
## Why two licenses
|
| 30 |
+
|
| 31 |
+
The model and parts of the codebase are derivative works of NVIDIA's A2SB
|
| 32 |
+
release, which is governed by the NVIDIA Source Code License for A2SB. That
|
| 33 |
+
license restricts use to non-commercial purposes and we cannot relax that
|
| 34 |
+
restriction.
|
| 35 |
+
|
| 36 |
+
PolyForm Noncommercial 1.0.0 layers on top with stricter, software-specific
|
| 37 |
+
terms — source-availability requirements, patent retaliation, clean
|
| 38 |
+
disclaimers — that the NVIDIA license itself doesn't impose. Both licenses
|
| 39 |
+
restrict use to non-commercial purposes, so they stack without contradiction.
|
| 40 |
+
|
| 41 |
+
## What's covered by what
|
| 42 |
+
|
| 43 |
+
| Component | Why it's NSCL-A2SB-bound | Also under PolyForm-NC |
|
| 44 |
+
|---|---|---|
|
| 45 |
+
| `training/locutius_train/network.py` | Verbatim port of NVIDIA's `networks.py` | yes |
|
| 46 |
+
| `training/locutius_train/diffusion.py` | Verbatim port of NVIDIA's `diffusion.py` | yes |
|
| 47 |
+
| Trained model checkpoints | Derivative of NVIDIA-released A2SB weights | yes |
|
| 48 |
+
| `corruption-chain/` (the synthesis/profiling package) | Original work, no NVIDIA code | yes |
|
| 49 |
+
| `training/main.py`, `config.py`, `data.py`, `trainer.py`, `restore.py`, `representation.py`, `utils.py`, `checkpoints.py` | Original work | yes |
|
| 50 |
+
| Profiles (`corruption-chain/profiles/*.json`) | Original measurements | yes |
|
| 51 |
+
| Documentation (`README.md`, `PLAN.md`, etc.) | Original | yes |
|
| 52 |
+
|
| 53 |
+
For the A2SB-derived portions, NVIDIA's copyright headers must remain intact
|
| 54 |
+
when you redistribute. We retain them in the source files.
|
| 55 |
+
|
| 56 |
+
## Practical examples
|
| 57 |
+
|
| 58 |
+
| Use case | Allowed? |
|
| 59 |
+
|---|---|
|
| 60 |
+
| Training your own variant on personal music for personal listening | ✅ |
|
| 61 |
+
| Publishing a research paper that uses this model | ✅ (cite it) |
|
| 62 |
+
| Sharing the code/model on GitHub or HuggingFace under the same dual license | ✅ |
|
| 63 |
+
| Using restored audio you made with the model in a commercial release | ❌ (the *output* is a derivative of the model under NSCL §3.3) |
|
| 64 |
+
| Selling a "festival audio cleanup" service powered by this model | ❌ |
|
| 65 |
+
| A streaming platform integrating this to clean audience-recorded uploads | ❌ |
|
| 66 |
+
| Internal tool inside a record label to triage live recordings before commercial release | ❌ (revenue-adjacent) |
|
| 67 |
+
| University course using it as a teaching example | ✅ |
|
| 68 |
+
|
| 69 |
+
## Not legal advice
|
| 70 |
+
|
| 71 |
+
The licenses themselves are the controlling documents — this file just
|
| 72 |
+
summarizes our reading. We are not lawyers. If you plan to do anything
|
| 73 |
+
non-trivial with this work, talk to one.
|
README.md
ADDED
|
@@ -0,0 +1,157 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: other
|
| 3 |
+
license_name: nscl-a2sb-and-polyform-nc
|
| 4 |
+
license_link: ./LICENSE.NSCL-A2SB
|
| 5 |
+
tags:
|
| 6 |
+
- audio
|
| 7 |
+
- audio-restoration
|
| 8 |
+
- schrodinger-bridge
|
| 9 |
+
- diffusion
|
| 10 |
+
- festival-audio
|
| 11 |
+
- non-commercial
|
| 12 |
+
library_name: pytorch
|
| 13 |
+
pipeline_tag: audio-to-audio
|
| 14 |
+
---
|
| 15 |
+
|
| 16 |
+
# Soundboard
|
| 17 |
+
|
| 18 |
+
Schrödinger Bridge denoiser fine-tuned for festival audio restoration —
|
| 19 |
+
recovers a soundboard-style mix from heavily-corrupted audience recordings
|
| 20 |
+
(room reverb + audience-mic blend + lossy codec artifacts).
|
| 21 |
+
|
| 22 |
+
Fine-tuned from NVIDIA's
|
| 23 |
+
[A2SB](https://huggingface.co/nvidia/audio_to_audio_schrodinger_bridge)
|
| 24 |
+
(`twosplit_0.5_1.0` split) on a synthetic-corruption training pipeline driven
|
| 25 |
+
by **profile-based augmentation** — corruption parameters are calibrated
|
| 26 |
+
from real (clean, festival-recording) pairs and sampled at training time
|
| 27 |
+
from the recovered distribution. See [Locutius](https://github.com/protodotdesign/locutius)
|
| 28 |
+
for the full corruption chain, profiling, and training scaffold.
|
| 29 |
+
|
| 30 |
+
## Quick facts
|
| 31 |
+
|
| 32 |
+
| | |
|
| 33 |
+
|---|---|
|
| 34 |
+
| Architecture | AttnUNetF (565.5M params) |
|
| 35 |
+
| Audio format | 44.1 kHz, 2-channel, 32-bit float |
|
| 36 |
+
| Segment length | 130560 samples (2.96 s) |
|
| 37 |
+
| STFT | n_fft=2048, hop=512, window=hann |
|
| 38 |
+
| Representation | 3-channel `[mag^0.25, cos(phase), sin(phase)]` |
|
| 39 |
+
| Trained at step | 50,000 |
|
| 40 |
+
| Base checkpoint | NVIDIA A2SB `twosplit_0.5_1.0` |
|
| 41 |
+
| Checkpoint size | 2.1 GB |
|
| 42 |
+
| Diffusion | Schrödinger Bridge, β_max=1.0 |
|
| 43 |
+
|
| 44 |
+
## Usage
|
| 45 |
+
|
| 46 |
+
Load with the [Locutius](https://github.com/protodotdesign/locutius)
|
| 47 |
+
training package:
|
| 48 |
+
|
| 49 |
+
```python
|
| 50 |
+
import torch
|
| 51 |
+
from huggingface_hub import hf_hub_download
|
| 52 |
+
from locutius_train.config import TrainConfig
|
| 53 |
+
from locutius_train.network import AttnUNetF, SinusoidalTemporalEmbedding
|
| 54 |
+
from locutius_train.diffusion import Diffusion
|
| 55 |
+
from locutius_train.representation import WaveformToInput, InputToWaveform
|
| 56 |
+
from locutius_train.restore import restore_spectrogram
|
| 57 |
+
|
| 58 |
+
ckpt_path = hf_hub_download(repo_id="protodotdesign/Soundboard", filename="model.pt")
|
| 59 |
+
sd = torch.load(ckpt_path, map_location="cuda", weights_only=False)
|
| 60 |
+
|
| 61 |
+
cfg = TrainConfig()
|
| 62 |
+
model = AttnUNetF(
|
| 63 |
+
n_updown_levels=cfg.model.n_updown_levels,
|
| 64 |
+
in_channels=cfg.model.in_channels,
|
| 65 |
+
hidden_channels=list(cfg.model.hidden_channels),
|
| 66 |
+
out_channels=cfg.model.out_channels,
|
| 67 |
+
emb_channels=cfg.diffusion.n_timestep_channels,
|
| 68 |
+
band_embedding_dim=cfg.model.band_embedding_dim,
|
| 69 |
+
n_attn_heads=cfg.model.n_attn_heads,
|
| 70 |
+
attention_levels=list(cfg.model.attention_levels),
|
| 71 |
+
use_attn_input_norm=cfg.model.use_attn_input_norm,
|
| 72 |
+
num_res_blocks=cfg.model.num_res_blocks,
|
| 73 |
+
).to("cuda").eval()
|
| 74 |
+
model.load_state_dict(sd["model"])
|
| 75 |
+
```
|
| 76 |
+
|
| 77 |
+
See `restore.py` in the Locutius repo for a complete CLI that takes a
|
| 78 |
+
clean source, applies the calibrated festival-corruption profile, and
|
| 79 |
+
runs the reverse Schrödinger Bridge to produce a restored output.
|
| 80 |
+
|
| 81 |
+
## Calibrated corruption profile
|
| 82 |
+
|
| 83 |
+
This model was trained against a single calibrated profile recovered
|
| 84 |
+
from a real (studio FLAC, festival M4A) pair via per-kick local
|
| 85 |
+
Wiener deconvolution. The profile is bundled in `profile.json`:
|
| 86 |
+
|
| 87 |
+
```json
|
| 88 |
+
{
|
| 89 |
+
"name": "edc_festival",
|
| 90 |
+
"ir_path": "../impulses/EchoThief/Brutalism/San Diego Supercomputer Center Outdoor Patio California.wav",
|
| 91 |
+
"delay_ms_range": [
|
| 92 |
+
15.0,
|
| 93 |
+
25.0
|
| 94 |
+
],
|
| 95 |
+
"studio_gain_range": [
|
| 96 |
+
0.6,
|
| 97 |
+
0.7
|
| 98 |
+
],
|
| 99 |
+
"room_gain_range": [
|
| 100 |
+
0.55,
|
| 101 |
+
0.65
|
| 102 |
+
]
|
| 103 |
+
}
|
| 104 |
+
```
|
| 105 |
+
|
| 106 |
+
Each training-step corruption draws fresh values from these ranges,
|
| 107 |
+
so the model has been exposed to ~50,000 distinct delay/blend
|
| 108 |
+
combinations within the same venue character.
|
| 109 |
+
|
| 110 |
+
## Training data
|
| 111 |
+
|
| 112 |
+
Trained on a focused subset of electronic music FLACs. **No festival
|
| 113 |
+
recordings or other licensed audio were stored or distributed** —
|
| 114 |
+
only the studio source material was used; festival-corrupted versions
|
| 115 |
+
were synthesized on-the-fly from the calibrated profile during each
|
| 116 |
+
training step.
|
| 117 |
+
|
| 118 |
+
## Limitations
|
| 119 |
+
|
| 120 |
+
- **Single profile**: trained against one calibrated venue (`edc_festival`).
|
| 121 |
+
Performance on festival recordings from very different venues / mix
|
| 122 |
+
chains will degrade.
|
| 123 |
+
- **Electronic music bias**: training set was EDM-heavy. Restoration
|
| 124 |
+
quality on rock, classical, or vocal-led material may be uneven.
|
| 125 |
+
- **No crowd-noise model**: the calibrated profile didn't include
|
| 126 |
+
additive crowd-noise (no real crowd recordings were available
|
| 127 |
+
during calibration). Recordings with heavy crowd vocals may have
|
| 128 |
+
residual artifacts.
|
| 129 |
+
- **Non-commercial use only** — see the license below.
|
| 130 |
+
|
| 131 |
+
## License
|
| 132 |
+
|
| 133 |
+
Dual non-commercial license:
|
| 134 |
+
|
| 135 |
+
- [NVIDIA Source Code License for A2SB](LICENSE.NSCL-A2SB) (the upstream
|
| 136 |
+
license inherited from the A2SB base checkpoint)
|
| 137 |
+
- [PolyForm Noncommercial 1.0.0](LICENSE.PolyForm-NC) (additional terms
|
| 138 |
+
on top, source-availability + patent retaliation)
|
| 139 |
+
|
| 140 |
+
You must comply with **both** licenses. Use is restricted to research
|
| 141 |
+
and evaluation only — no commercial use is permitted. See
|
| 142 |
+
[LICENSING.md](https://github.com/protodotdesign/locutius/blob/main/LICENSING.md)
|
| 143 |
+
for the full plain-English breakdown.
|
| 144 |
+
|
| 145 |
+
## Citation
|
| 146 |
+
|
| 147 |
+
If you use this model in research, please cite the upstream A2SB paper
|
| 148 |
+
and reference this fine-tune:
|
| 149 |
+
|
| 150 |
+
```bibtex
|
| 151 |
+
@misc{soundboard,
|
| 152 |
+
title={Soundboard: festival audio restoration via profile-calibrated Schrödinger Bridge fine-tuning},
|
| 153 |
+
author={Locutius},
|
| 154 |
+
year={2026},
|
| 155 |
+
howpublished={\url{https://huggingface.co/protodotdesign/Soundboard}},
|
| 156 |
+
}
|
| 157 |
+
```
|
config.json
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": {
|
| 3 |
+
"n_updown_levels": 5,
|
| 4 |
+
"in_channels": 3,
|
| 5 |
+
"out_channels": 3,
|
| 6 |
+
"hidden_channels": [
|
| 7 |
+
128,
|
| 8 |
+
256,
|
| 9 |
+
512,
|
| 10 |
+
768,
|
| 11 |
+
1024,
|
| 12 |
+
2048
|
| 13 |
+
],
|
| 14 |
+
"emb_channels": 128,
|
| 15 |
+
"band_embedding_dim": 16,
|
| 16 |
+
"n_attn_heads": 8,
|
| 17 |
+
"attention_levels": [
|
| 18 |
+
3,
|
| 19 |
+
4
|
| 20 |
+
],
|
| 21 |
+
"use_attn_input_norm": true,
|
| 22 |
+
"num_res_blocks": 2
|
| 23 |
+
},
|
| 24 |
+
"diffusion": {
|
| 25 |
+
"beta_min": 0.0001,
|
| 26 |
+
"beta_max": 1.0,
|
| 27 |
+
"use_ot_ode": false,
|
| 28 |
+
"n_timestep_channels": 128
|
| 29 |
+
},
|
| 30 |
+
"data": {
|
| 31 |
+
"music_root": "/workspace/music",
|
| 32 |
+
"profiles_dir": "../corruption-chain/profiles",
|
| 33 |
+
"impulses_root": "../corruption-chain/impulses",
|
| 34 |
+
"crowd_root": null,
|
| 35 |
+
"profile_weight": 0.85,
|
| 36 |
+
"skip_unsupported_modes": true,
|
| 37 |
+
"val_fraction": 0.02,
|
| 38 |
+
"passes": 4,
|
| 39 |
+
"exclude_dirs": []
|
| 40 |
+
},
|
| 41 |
+
"optim": {
|
| 42 |
+
"lr": 1e-05,
|
| 43 |
+
"weight_decay": 0.0,
|
| 44 |
+
"grad_clip": 0.5,
|
| 45 |
+
"betas": [
|
| 46 |
+
0.9,
|
| 47 |
+
0.999
|
| 48 |
+
]
|
| 49 |
+
},
|
| 50 |
+
"output_dir": "runs/big-run-50k",
|
| 51 |
+
"checkpoint_in": "twosplit_0.5_1.0",
|
| 52 |
+
"t_range": [
|
| 53 |
+
0.0,
|
| 54 |
+
1.0
|
| 55 |
+
],
|
| 56 |
+
"seed": 42,
|
| 57 |
+
"max_steps": 50000,
|
| 58 |
+
"val_every_steps": 1000,
|
| 59 |
+
"save_every_steps": 1000,
|
| 60 |
+
"log_every_steps": 10,
|
| 61 |
+
"keep_last_checkpoints": 3,
|
| 62 |
+
"batch_size": 2,
|
| 63 |
+
"num_workers": 4,
|
| 64 |
+
"precision": "bf16",
|
| 65 |
+
"device": "cuda"
|
| 66 |
+
}
|
model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:69ef36a810a27a05cf79237858702ae9039e54d8474e5ec016b2f516066608a9
|
| 3 |
+
size 2262278487
|
profile.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "edc_festival",
|
| 3 |
+
"ir_path": "../impulses/EchoThief/Brutalism/San Diego Supercomputer Center Outdoor Patio California.wav",
|
| 4 |
+
"delay_ms_range": [
|
| 5 |
+
15.0,
|
| 6 |
+
25.0
|
| 7 |
+
],
|
| 8 |
+
"studio_gain_range": [
|
| 9 |
+
0.6,
|
| 10 |
+
0.7
|
| 11 |
+
],
|
| 12 |
+
"room_gain_range": [
|
| 13 |
+
0.55,
|
| 14 |
+
0.65
|
| 15 |
+
]
|
| 16 |
+
}
|